The purpose of this markdown is to report clinical accuracy by displaying the Error Grid analysis, computing logistic regression on “well-matched” pairs, calculating Cohen’s Kappa, and our “transfusion test” results.
First we load the necessary packages:
suppressPackageStartupMessages({
# Data frame manipulation
require(dplyr)
# Graphics and output
require(ggplot2)
# Tables
require(knitr)
require(kableExtra)
# Error grid point allocation
require(ptinpoly)
})
Ensure the environmental variables are specified:
if (Sys.getenv('PICU_LAB_DATA_PATH') == '' |
Sys.getenv('PICU_LAB_IMG_PATH') == '' |
Sys.getenv('PICU_LAB_IN_FILE') == '' |
Sys.getenv('PICU_LAB_SITE_NAME') == '' |
Sys.getenv('PICU_LAB_RUN_DATE') == '')
stop('Missing necessary environmental variables - see README.md')
cat(sprintf('Site: %s\n', Sys.getenv('PICU_LAB_SITE_NAME')))
## Site: CHOP
Specify the run date:
run.date <- Sys.getenv('PICU_LAB_RUN_DATE')
cat(sprintf('Run Date: %s\n', run.date))
## Run Date: 2022-12-22
Load data from the DATA_PATH with the associated IN_FILE, adding a file separator between them. This should result in loading two data frames: cohort.df and labs.df.
load(
file = file.path(
Sys.getenv('PICU_LAB_DATA_PATH'),
Sys.getenv('PICU_LAB_IN_FILE')
)
)
We will utilize several sensitivity analyses in this markdown - these should be identical to the sensitivity indicators in the prior markdown 02_Analytic_Accuracy.Rmd. No changes should be made to these parameters - only additions for more sensitivity parameters.
# The primary cutoff value between collection times (in minutes) to
# determine "simultaneous"
primary.cutoff <- 15.
# Sensitivity analysis list
sens.cutoffs <- c(1., 30., 90.)
# Hgb cutoffs
primary.hgb.cutoff <- 7.0
sens.hgb.cutoffs <- c(5.0, 9.0)
The below join function is a copy of the function used in 02_Analytic_Accuracy.Rmd. No changes should be made to this version - make changes to the prior version, re-test within that script, and then copy here. After development, this will be moved to a package.
#'
#' @title Create Paired Dataset
#'
#' @description Creates a dataset of paired simultaneous lab values
#'
#' @param labs.df The labs data frame
#' @param cohort.df The cohort data frame, needed for PAT_KEY and DEPT
#' @param PN A two-element list of PROC_NAMEs to join
#' @param time.diff The max time difference (min) between collected times
#' @param CN The COMP_NAME to join [Default: 'Hgb']
#' @param multi.per.pt If FALSE, limit to first result per patient, otherwise
#' if TRUE [Default], allow all
#'
#' @returns The resulting joined data frame
#'
createPairedDataset <- function (labs.df, cohort.df, PN, time.diff,
CN = 'Hgb', multi.per.pt = T) {
# First we filter to remove the non-numeric rows
filter.df <-
labs.df %>%
dplyr::filter(!is.na(NUM_VAL) & NUM_VAL != 9999999.) %>%
dplyr::filter(COMP_NAME == CN)
cat(sprintf('Number of component numeric rows in input data frame: %d\n',
nrow(filter.df)))
# Join to get PAT_KEY and DEPT, used in subsequent filtering
keyed.df <-
dplyr::left_join(
x = filter.df,
y = cohort.df %>%
dplyr::select(ENC_KEY, PAT_KEY, DEPT),
by = c('ENC_KEY')
)
# Now we filter by PN and join to create full data frame
joined.df <-
dplyr::inner_join(
x = keyed.df %>%
dplyr::filter(PROC_NAME == PN[1]) %>%
dplyr::select(ENC_KEY, PAT_KEY, ORDER_PROC_KEY,
DEPT, COLLECTED_DT, RESULT_DT, NUM_VAL, AGE_PROC),
y = keyed.df %>%
dplyr::filter(PROC_NAME == PN[2]) %>%
dplyr::select(ENC_KEY, PAT_KEY, ORDER_PROC_KEY,
DEPT, COLLECTED_DT, RESULT_DT, NUM_VAL),
by = c('ENC_KEY', 'PAT_KEY', 'DEPT'),
suffix = c('.x', '.y')
)
# Join using base R, by column number
# [[5]] is PN[1] COLLECTED_DT
# [[10]] is PN[2] COLLECTED_DT
joined.df$COLL_TIME_DIFF_MIN <-
as.numeric(joined.df[[5]] - joined.df[[10]], units = 'mins')
# Apply the cutoff time
cutoff.df <-
joined.df %>%
dplyr::filter(abs(COLL_TIME_DIFF_MIN) < time.diff)
cat(sprintf('Number of paired, simultaneous values meeting cutoff: %d\n',
nrow(cutoff.df)))
# Ensure that each first PROC_NAME order is only used once - meaning that
# each ORDER_PROC_KEY.x should be unique
unique.x.df <-
cutoff.df%>%
dplyr::arrange(ORDER_PROC_KEY.x, COLL_TIME_DIFF_MIN) %>%
dplyr::group_by(ORDER_PROC_KEY.x) %>%
dplyr::summarize(
ORDER_PROC_KEY.y = first( ORDER_PROC_KEY.y ),
DEPT = first( DEPT ),
COLLECTED_DT.x = first( COLLECTED_DT.x ),
RESULT_DT.x = first( RESULT_DT.x ),
NUM_VAL.x = first( NUM_VAL.x ),
COLLECTED_DT.y = first( COLLECTED_DT.y ),
RESULT_DT.y = first( RESULT_DT.y ),
NUM_VAL.y = first( NUM_VAL.y ),
COLL_TIME_DIFF_MIN = first( COLL_TIME_DIFF_MIN ),
AGE_PROC = first( AGE_PROC ),
ENC_KEY = first( ENC_KEY ),
PAT_KEY = first( PAT_KEY )
) %>%
dplyr::ungroup()
cat(sprintf('Number of non-duplicated first PROC_NAME rows: %d\n',
nrow(unique.x.df)))
# Similarly, ensure that each second PROC_NAME order is being used just once
# (i.e., that ORDER_PROC_KEY.y is not duplicated)
non.dup.df <-
unique.x.df %>%
dplyr::arrange(ORDER_PROC_KEY.y, COLL_TIME_DIFF_MIN) %>%
dplyr::group_by(ORDER_PROC_KEY.y) %>%
dplyr::summarize(
ORDER_PROC_KEY.x = first( ORDER_PROC_KEY.x ),
DEPT = first( DEPT ),
COLLECTED_DT.x = first( COLLECTED_DT.x ),
RESULT_DT.x = first( RESULT_DT.x ),
NUM_VAL.x = first( NUM_VAL.x ),
COLLECTED_DT.y = first( COLLECTED_DT.y ),
RESULT_DT.y = first( RESULT_DT.y ),
NUM_VAL.y = first( NUM_VAL.y ),
COLL_TIME_DIFF_MIN = first( COLL_TIME_DIFF_MIN ),
AGE_PROC = first( AGE_PROC ),
ENC_KEY = first( ENC_KEY ),
PAT_KEY = first( PAT_KEY )
) %>%
dplyr::ungroup()
cat(sprintf('Number of non-duplicated second PROC_NAME rows: %d\n',
nrow(non.dup.df)))
# Do we limit by one per patient?
if (!multi.per.pt) {
per.pt.df <-
non.dup.df %>%
# Sort by PAT_KEY and the first COLLECTED DT
dplyr::arrange(PAT_KEY, COLLECTED_DT.x) %>%
# Group by PAT_KEY and add a "LINE" number
dplyr::group_by(PAT_KEY) %>%
dplyr::mutate(
PAT_LINE = row_number()
) %>%
# Ungroup
dplyr::ungroup() %>%
# Filter for lines == 1 only
dplyr::filter(PAT_LINE == 1) %>%
dplyr::select(-PAT_LINE)
} else {
per.pt.df <- non.dup.df
}
cat(sprintf('Number of paired, simultaneous values: %d\n',
nrow(per.pt.df)))
cat(sprintf('Number of duplicated ORDER_PROC_KEY.x values: %d\n',
sum(duplicated(per.pt.df$ORDER_PROC_KEY.x))))
return(per.pt.df)
}
First we create the CBC - BG dataset using the primary cutoff value, and include all pairs per patient.
cbc.bg <- createPairedDataset(
labs.df = labs.df,
cohort.df = cohort.df,
PN = c('CBC', 'BG'),
CN = 'Hgb',
time.diff = primary.cutoff,
multi.per.pt = T
)
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 72997
## Number of non-duplicated first PROC_NAME rows: 67141
## Number of non-duplicated second PROC_NAME rows: 67077
## Number of paired, simultaneous values: 67077
## Number of duplicated ORDER_PROC_KEY.x values: 0
In this section we complete the clinical accuracy assessments.
We begin the assessment of clinical accuracy by creating the Error Grid.
#'
#' @title Calculate Error Grid
#'
#' @description Calculates points within each area of Error Grid and plots
#'
#' @param df The paired samples data frame, calculated above
#' @param to.plot If TRUE [Default], displays the Error Grid plot
#' @param to.return If TRUE [Default], returns the plot
#'
calculateErrorGrid <- function (df, to.plot = T, to.return = T) {
#'
#' Sub-function to define the underlying grid pts
#'
makeBaseGrid <- function () {
# Define the points which comprise the Error Grid
A <- data.frame(
X = c(0, 6, 6, 10, 25, 25, 9, 9, 5.4, 0),
Y = c(0, 0, 5.4, 9, 9, 25, 25, 10, 6, 6))
B <- data.frame(
X = c(0, 25, 25, 0),
Y = c(0, 0, 25, 25))
C <- data.frame(
X = c(0, 6, 6, 0),
Y = c(10, 10, 25, 25))
D <- data.frame(
X = c(10, 10, 25, 25),
Y = c(0, 6, 6, 0))
# Generate grid
p <-
ggplot() +
geom_abline(mapping = NULL, data = NULL,
slope = 1, intercept = 0, na.rm = FALSE,
show.legend = NA, size = 1) +
geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'green',
fill = 'green', alpha = 0.2, data = A) +
geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'yellow',
fill = 'yellow', alpha = 0.1, data = B) +
geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'red',
fill = 'red', alpha = 0.2, data = C) +
geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'red',
fill = 'red', alpha = 0.2, data = D) +
coord_cartesian(ylim = c(4, 20), xlim = c(4, 20)) +
scale_fill_distiller(palette = 4, direction = 1)
return(list(
A = A, B = B, C = C, D = D, p = p
))
} # End of sub-function
g <- makeBaseGrid()
Queries <- as.matrix(df %>% dplyr::select(NUM_VAL.x, NUM_VAL.y))
A.res <- ptinpoly::pip2d(Vertices = as.matrix(g$A), Queries = Queries)
B.res <- ptinpoly::pip2d(Vertices = as.matrix(g$B), Queries = Queries)
C.res <- ptinpoly::pip2d(Vertices = as.matrix(g$C), Queries = Queries)
D.res <- ptinpoly::pip2d(Vertices = as.matrix(g$D), Queries = Queries)
# First display raw "Box" output
cat(sprintf(paste0(
'Counts by Box:\n',
'\tBox A: %d (%0.1f %%)\n',
'\tBox B: %d (%0.1f %%)\n',
'\tBox C: %d (%0.1f %%)\n',
'\tBox D: %d (%0.1f %%)\n'),
sum(A.res >= 0), sum(A.res >= 0) / length(A.res) * 100.,
sum(B.res >= 0), sum(B.res >= 0) / length(A.res) * 100.,
sum(C.res >= 0), sum(C.res >= 0) / length(A.res) * 100.,
sum(D.res >= 0), sum(D.res >= 0) / length(A.res) * 100.))
# Now display by Green, Yellow, Red
# Note that Green = A, Yellow = B - A - C - D, Red = C + D
cat(sprintf(paste0(
'Counts by Area:\n',
'\tGreen Area: %d (%0.2f %%)\n',
'\tYellow Area: %d (%0.2f %%)\n',
'\tRed Area: %d (%0.2f %%)\n'),
sum(A.res >= 0), sum(A.res >= 0) / length(A.res) * 100.,
sum(B.res >= 0) - sum(A.res >= 0) - sum(C.res >= 0) - sum(D.res >= 0),
( sum(B.res >= 0) - sum(A.res >= 0) - sum(C.res >= 0) - sum(D.res >= 0) ) /
length(A.res) * 100.,
sum(C.res >= 0) + sum(D.res >= 0),
(sum(C.res >= 0) + sum(D.res >= 0) ) / length(A.res) * 100.
))
# Plot and return (pending parameters)
if (to.plot & to.return) {
Error_Grid <-
g$p +
geom_jitter(aes(x = NUM_VAL.x, y = NUM_VAL.y), data = df,
width = 0.3, height = 0.3, size = 0.3) +
xlab('Reference Lab Value') +
ylab('Measured Lab Value') +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
theme(panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
}
if (to.plot) {
print(Error_Grid)
}
if (to.return) {
return(Error_Grid)
} else {
return()
}
}
Now we use this function to calculate Error Grid counts and display plot:
error.grid.cbc.bg <- calculateErrorGrid(
df = cbc.bg,
to.plot = T,
to.return = T
)
## Counts by Box:
## Box A: 63567 (94.8 %)
## Box B: 67077 (100.0 %)
## Box C: 42 (0.1 %)
## Box D: 29 (0.0 %)
## Counts by Area:
## Green Area: 63567 (94.77 %)
## Yellow Area: 3439 (5.13 %)
## Red Area: 71 (0.11 %)
To complete regression analysis, we must first gather the other variables from the original labs.df data frame. Then we set a “well-matched” threshold and run the regression model.
To do this properly on the UR system, which utilizes both “blood gas” and “blood gas panel” orders for different components of the blood gas, we need to join on ENC_KEY and by COLLECTED_DT because ORDER_PROC_KEY does NOT generate the full match.
#'
#' @title Gather Covariates
#'
#' @description Creates pivoted data frame of covariate labs for each pair
#'
#' @details Requires that the column `ORDER_PROC_KEY.x` is unique in the
#' paired dataframe. This will be true when the data frame is created
#' using the above function `createPairedDataset()` which catches duplicates.
#'
#' To find covariates, instead of matching on the `ORDER_PROC_KEY.x` which
#' is correct for some of the components, matches on the PN[2] collected time
#' (`COLLECTED_DT.y`) and the encounter key. These are checked to be unique
#' in the input joined data frame as well.
#'
#' @param paired.df The paired dataframe containing unique values at the
#' `ORDER_PROC_KEY.x` column, which is the column for the order key for
#' PN[1] (typically the CBC). Also must contain (at least) the columns
#' `COLLECTED_DT.y` and `ENC_KEY`
#' @param labs.df The full labs data frame
#' @param covars A list of covariate names into the `COMP_NAME` column
#'
#' @returns A pivoted data frame of covariates
#'
gatherCovariates <- function (paired.df, labs.df,
covars = c('pH', 'Bicarb', 'iCal', 'Gluc', 'Lactate')) {
# Ensure that these are all unique
if (length(unique(paired.df$ORDER_PROC_KEY.x)) !=
length(paired.df$ORDER_PROC_KEY.x))
stop('PN[1] Order Proc Keys should be unique')
# Ensure that collected time of PN[2] and ENC_KEY are distinct
if (nrow(paired.df) !=
nrow(paired.df %>%
dplyr::select(COLLECTED_DT.y, ENC_KEY) %>%
dplyr::distinct()))
stop('COLLECTED_DT.y and ENC_KEY tuple are not distinct in paired data frame')
# Filter to remove any cancelled labs or NaNs
filtered.df <-
labs.df %>%
dplyr::filter(!is.na(NUM_VAL) & NUM_VAL != 9999999.)
# Initialize the result data frame using the unique `ORDER_PROC_KEY.x` values
result.df <- data.frame(
ORDER_PROC_KEY.x = paired.df$ORDER_PROC_KEY.x
)
cat(sprintf('NUmber of unique PN[1] order procedure keys: %d\n',
nrow(result.df)))
# Join the filtered data frame to full lab results and find matches
# on the ENC_KEY and COLLECTED_DT.y
joined.df <-
dplyr::inner_join(
x = paired.df %>%
dplyr::select(ORDER_PROC_KEY.x, ENC_KEY, COLLECTED_DT.y),
y = filtered.df,
by = c('ENC_KEY', 'COLLECTED_DT.y' = 'COLLECTED_DT')
)
# Join each component to the results data frame
for (CN in covars) {
result.df <-
joined.df %>%
dplyr::filter(COMP_NAME == CN) %>%
dplyr::select(ORDER_PROC_KEY.x, NUM_VAL, RESULT_DT) %>%
dplyr::arrange(ORDER_PROC_KEY.x, RESULT_DT) %>%
dplyr::group_by(ORDER_PROC_KEY.x) %>%
dplyr::summarize(
LAST_ADD = first(NUM_VAL)
) %>%
dplyr::ungroup() %>%
dplyr::right_join(
y = result.df,
by = c('ORDER_PROC_KEY.x')
)
names(result.df)[which(names(result.df) == 'LAST_ADD')] <- CN
}
return(result.df)
}
Here we utilize the gatherCovariates() function to extract covariates for our paired dataset:
# Gather covariates for this paired set
covars.df <- gatherCovariates(cbc.bg, labs.df)
## NUmber of unique PN[1] order procedure keys: 67077
Now we display stats on the covariates, to ensure we have appropriately filled the table and show the distributions. TO do this, we write a function which iterates across the columns:
#'
#' @title Display Covariate Stats
#'
#' @description Displays statistics on covariates in the data frame
#'
#' @param covars.df The Covariates data frame from `gatherCovariates()` function
#'
displayCovariateStats <- function (covars.df) {
# Display stats on the covariates, including checking for NULL values and
# displaying distributions
for (index in 2 : ncol(covars.df)) {
this.vec <- covars.df[,index]
print(summary(this.vec))
cat(sprintf('Count (and %%) of NAs in %s column: %d (%0.2f %%)\n',
names(covars.df)[index],
sum(is.na(this.vec)),
sum(is.na(this.vec)) / nrow(covars.df) * 100.))
bounds <- quantile(this.vec, probs = c(0.01, 0.99), na.rm = T)
this.df <-
covars.df %>%
dplyr::select(all_of(index))
names(this.df) <- c('val')
this.filt.df <-
this.df %>%
dplyr::filter(val >= bounds[1] & val <= bounds[2])
hist.bins <- min(
length(unique(this.filt.df$val)),
40)
p <-
this.filt.df %>%
ggplot() +
geom_histogram(aes(x = val), bins = hist.bins) +
xlab(paste0(names(covars.df)[index], ' values (1st - 99th percentile)')) +
ylab('Count') +
theme_bw()
print(p)
}
}
displayCovariateStats(covars.df)
## Lactate
## Min. : 0.500
## 1st Qu.: 1.100
## Median : 1.500
## Mean : 2.071
## 3rd Qu.: 2.200
## Max. :27.200
## NA's :17366
## Count (and %) of NAs in Lactate column: 17366 (25.89 %)
## Gluc
## Min. : 13.0
## 1st Qu.: 97.0
## Median : 115.0
## Mean : 127.7
## 3rd Qu.: 141.0
## Max. :3008.0
## NA's :5903
## Count (and %) of NAs in Gluc column: 5903 (8.80 %)
## iCal
## Min. :0.270
## 1st Qu.:1.110
## Median :1.170
## Mean :1.177
## 3rd Qu.:1.230
## Max. :2.740
## NA's :5841
## Count (and %) of NAs in iCal column: 5841 (8.71 %)
## Bicarb
## Min. : 1.80
## 1st Qu.:23.90
## Median :27.30
## Mean :28.08
## 3rd Qu.:31.60
## Max. :82.60
## NA's :5
## Count (and %) of NAs in Bicarb column: 5 (0.01 %)
## pH
## Min. : 6.605
## 1st Qu.: 7.353
## Median : 7.398
## Mean : 7.393
## 3rd Qu.: 7.438
## Max. :37.600
## NA's :4
## Count (and %) of NAs in pH column: 4 (0.01 %)
Now we join the covariates to the paired dataframe and add the age, filter and impute. Then we run the logistic regression and display the results:
#'
#' @title Join Impute Regress
#'
#' @description Join covars and pairs, impute NA values, regress, and report results
#'
#' @param paired.df A dataframe of paired PN[1] and PN[2] values, created from
#' the function `createPairedDataset()`
#' @param covars.df A dataframe of covariates, created from the
#' function `gatherCovariates()`
#' @param thresh.list A list of three-element vectors, where each three-element
#' vector is of the format c(min, max, thresh). Such that for the list of
#' pairs, if the mean Hgb (between PN[1] and PN[2]) falls between `min` and
#' `max` and the difference is less than the threshold `thresh`, then the
#' pair is considered `WELL_MATCHED` for the purposes of our logistic
#' regression.
#' @param impute.fx The function for imputing NA values [Default: `median`]
#' @param ci If TRUE, compute the confidence intervals on the regression results
#' [Default: TRUE]
#'
#' @returns A list of the regression results as well as CIs (if computed)
#'
joinImputeRegress <- function (paired.df, covars.df, thresh.list,
impute.fx = median, ci = T) {
# First we join the paired data frame (as the basis) with the covariates,
# by the unique ORDER PROC key of PN[1] (should be CBC)
joined.df <-
dplyr::left_join(
x = paired.df,
y = covars.df,
by = c('ORDER_PROC_KEY.x')
) %>%
dplyr:: select(NUM_VAL.x, NUM_VAL.y, AGE_PROC, pH, Bicarb, iCal, Gluc, Lactate, DEPT)
# Impute NA values based on the impute function
impute.df <-
joined.df %>%
dplyr::mutate(
pH = ifelse( is.na(pH), impute.fx(joined.df$pH, na.rm = T), pH),
Gluc = ifelse( is.na(Gluc), impute.fx(joined.df$Gluc, na.rm = T), Gluc),
iCal = ifelse( is.na(iCal), impute.fx(joined.df$iCal, na.rm = T), iCal),
Lactate = ifelse( is.na(Lactate), impute.fx(joined.df$Lactate, na.rm = T), Lactate),
Bicarb = ifelse( is.na(Bicarb), impute.fx(joined.df$Bicarb, na.rm = T), Bicarb)
)
# Threshold to create "WELL_MATCHED" column
thresh.df <-
impute.df %>%
dplyr::mutate(
MEAN_HGB = (NUM_VAL.x + NUM_VAL.y) / 2.,
DIFF_HGB = abs(NUM_VAL.x - NUM_VAL.y),
WELL_MATCHED = F # Default to FALSE
)
# Loop through the threshold list and OR together the WELL_MATCHED values
for (t in thresh.list) {
if (length(t) != 3)
stop('Each vector within the threshold list must be three elements')
thresh.df$WELL_MATCHED <-
thresh.df$WELL_MATCHED |
(thresh.df$MEAN_HGB > t[1] & thresh.df$MEAN_HGB < t[2] & thresh.df$DIFF_HGB < t[3])
}
# Remove the MEAN and DIFF variables, as well as PN[1] value
thresh.df <-
thresh.df %>%
dplyr::select(-MEAN_HGB, -DIFF_HGB, -NUM_VAL.x)
cat(sprintf('Number (%%) of `WELL MATCHED`: %d (%0.2f %%)\n',
sum(thresh.df$WELL_MATCHED),
sum(thresh.df$WELL_MATCHED) / nrow(thresh.df) * 100.))
# Run the logistic regression
reg.model <- glm(
WELL_MATCHED ~ NUM_VAL.y +
pH +
Gluc +
Bicarb +
iCal +
Lactate +
AGE_PROC +
DEPT,
family = 'binomial',
data = thresh.df %>%
dplyr::mutate(pH = pH * 10.)
)
print(summary(reg.model))
print(exp(reg.model$coefficients))
if (ci) {
ci.reg <- confint(reg.model)
print(exp(ci.reg))
return(list(
reg.model,
ci.reg
))
} else {
return(list(
reg.model
))
}
}
Now run the joining, imputing, and regression:
# Threshold lists are a list of three-element vectors, which the three elements
# corresponding to: min, max, threshold
# This can be read as, between the min and max, the mean diff must be less than
# the threshold, otherwise it is not `WELL_MATCHED`
thresh.list <-
list(
c(-100, 6, 1.5),
c(6, 9, 1.0),
c(9, 100, 1.5)
)
regress.res <-
joinImputeRegress(
paired.df = cbc.bg,
covars.df = covars.df,
thresh.list = thresh.list,
impute.fx = median,
ci = T
)
## Number (%) of `WELL MATCHED`: 62131 (92.63 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3293 0.2559 0.3995 0.4421 1.1141
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.526e+00 5.240e-01 6.729 1.71e-11 ***
## NUM_VAL.y 3.478e-02 7.502e-03 4.637 3.54e-06 ***
## pH -7.233e-03 6.342e-03 -1.140 0.254
## Gluc -5.059e-04 2.438e-04 -2.075 0.038 *
## Bicarb 2.274e-03 2.332e-03 0.975 0.329
## iCal -1.141e-01 1.494e-01 -0.764 0.445
## Lactate 7.891e-02 1.095e-02 7.208 5.67e-13 ***
## AGE_PROC -1.003e-04 7.173e-06 -13.988 < 2e-16 ***
## DEPTPICU -8.797e-01 4.335e-02 -20.295 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 35309 on 67076 degrees of freedom
## Residual deviance: 34047 on 67068 degrees of freedom
## AIC: 34065
##
## Number of Fisher Scoring iterations: 6
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 33.9810602 1.0353945 0.9927930 0.9994943 1.0022771 0.8921726
## Lactate AGE_PROC DEPTPICU
## 1.0821103 0.9998997 0.4149064
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 9.5488867 111.6783127
## NUM_VAL.y 1.0203131 1.0507622
## pH 0.9780113 1.0089308
## Gluc 0.9990279 0.9999864
## Bicarb 0.9977260 1.0068891
## iCal 0.6666270 1.1972400
## Lactate 1.0597008 1.1061743
## AGE_PROC 0.9998856 0.9999137
## DEPTPICU 0.3809174 0.4514699
Cohen’s kappa coefficient provides an assessment of agreement between two “raters”, or as an assessment of classification matching. In our case, we would like to understand the agreement between two PROC_NAME (e.g. CBC and BG) hemoglobins, when a simple threshold is applied.
We hypothesize that there will be agreement at the tails but that there will be some disagreement in the local vicinity of the threshold, which will likely drive down the Kappa coefficient.
First, here is the function to compute the Cohen’s Kappa:
#'
#' @title Calculate Cohen Kappa
#'
#' @description Calculates the Cohen Kappa statistic for two vectors of CN values
#'
#' @details
#' Recall that Cohen's Kappa is defined as:
#'
#' K = (P_observed - P_expected) / (1 - P_expected)
#'
#' Where:
#' P_expected = sum(P_pos + P_neg) with
#' P_pos = P_raterA+ x P_raterB+ and P_neg = P_raterA- x P_raterB-
#'
#' In our case, Rater A will be positive when value.x (from PN[1]) are less
#' than the cutoff, suggesting the need for a transfusion. Similarly,
#' Rater B will be positive when value.y (from PN[2]) are less than the
#' cutoff.
#'
#' A "Positive" response (meaning we have to transfuse) is when Hgb < cutoff,
#' and a "Negative" response (meaning we do not transfuse) is when Hgb >= cutoff.
#'
#' @param values.x A column vector of Hgb values from PN[1]
#' @param values.y A column vector of Hgb values from PN[2] (with length
#' same as rater.A.bg)
#' @param cutoff A scalar representing the Hgb cutoff value
#' @param to.print If TRUE, prints results in addition to returning [Default]
#'
#' @return The Cohen Kappa for these two vectors at the cutoff given
#'
calculateCohenKappa <- function (values.x, values.y,
cutoff = 7.0, to.print = T) {
# Verify that the lengths of the two vectors of PN values are identical
if (length(values.x) != length(values.y))
stop('Error: vectors for X and Y must be of equal lengths')
if (to.print)
cat(sprintf('Pre-Range Check Length: %d\n', length(values.x)))
if (to.print)
cat(sprintf('Cutoff value used: %d\n', cutoff))
# Consider a 2x2 matrix with two "Raters" (or two vectors):
#
# values.x
# Yes | No
# -------------------
# Yes | A | B |
# values.y ----|--------|--------|
# No | C | D |
# -------------------
#
# where: len = A + B + C + D
#
P.x.pos <- sum(values.x < cutoff) / length(values.x) # Equiv to 'A + C' / len
P.x.neg <- sum(values.x >= cutoff) / length(values.x) # Equiv to 'B + D' / len
P.y.pos <- sum(values.y < cutoff) / length(values.y) # Equiv to 'A + B' / len
P.y.neg <- sum(values.y >= cutoff) / length(values.y) # Equiv to ''C + D' / len
if (to.print) {
cat(sprintf('X :: Pos: %0.2f\tNeg: %0.2f\n', P.x.pos, P.x.neg))
cat(sprintf('Y :: Pos: %0.2f\tNeg: %0.2f\n', P.y.pos, P.y.neg))
}
P.pos <- P.x.pos * P.y.pos
P.neg <- P.x.neg * P.y.neg
if (to.print)
cat(sprintf('\tP.pos: %0.4f\n\tP.neg: %0.4f\n', P.pos, P.neg))
P.exp <- P.pos + P.neg
# Observed are the sum of counts of when both are either > or <=, divided by total
# Note that we can divide by either length(rater.A.bg) or length(rater.B.cbc)
# since they are equal
P.obs <- (
sum((values.x < cutoff) & (values.y < cutoff)) +
sum((values.x >= cutoff) & (values.y >= cutoff)) ) / length(values.x)
if (to.print)
cat(sprintf('\tP.obs: %0.2f\n\tP.exp: %0.2f\n', P.obs, P.exp))
kappa <- (P.obs - P.exp) / (1. - P.exp)
if (to.print)
cat(sprintf('Kappa: %0.2f\n', kappa))
return(kappa)
}
Now we use this function to calculate the Cohen’s kappa at a given primary threshold:
calculateCohenKappa(
values.x = cbc.bg$NUM_VAL.x,
values.y = cbc.bg$NUM_VAL.y,
cutoff = primary.hgb.cutoff,
to.print = T
)
## Pre-Range Check Length: 67077
## Cutoff value used: 7
## X :: Pos: 0.02 Neg: 0.98
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0001
## P.neg: 0.9747
## P.obs: 0.99
## P.exp: 0.97
## Kappa: 0.51
## [1] 0.511274
And run again across the secondary thresholds:
for (thresh in sens.hgb.cutoffs) {
calculateCohenKappa(
values.x = cbc.bg$NUM_VAL.x,
values.y = cbc.bg$NUM_VAL.y,
cutoff = thresh,
to.print = T
)
}
## Pre-Range Check Length: 67077
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9983
## P.obs: 1.00
## P.exp: 1.00
## Kappa: 0.39
## Pre-Range Check Length: 67077
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.12 Neg: 0.88
## P.pos: 0.0250
## P.neg: 0.7000
## P.obs: 0.91
## P.exp: 0.72
## Kappa: 0.67
rm(thresh)
In this section, we ask the question, “If the BG Hgb value is greater than X, what is the likelihood that the CBC Hgb is less than Y?” for an appropriate transfusion threshold Y.
Similarly, we can ask the question, “If the BG Hgb value is less than X, what is the likelihood that the CBC Hgb is greater than Y?” for the same set of transfusion thresholds.
First we create the function to calculate the 2x2 matrix for this test. The “gold standard” is the PN[1] value, most often CBC. The “test” is the PN[2] value, either BG or iSTAT. In words:
In this way, the “sensitivity” is the percent of actually anemic patients identified correctly by the test. The PPV is the percent of patients identified by the test as anemic who are actually anemic (and is dependent on the incidence of anemia in our population).
#'
#' @title Transfusion Confusion Matrix
#'
#' @description Creates a 2x2 confusion matrix for a pair of cutoffs and direction
#'
#' @param value.x The value of PN[1] elements (typically CBC)
#' @param value.y The value of PN[2] elements (typically BG or iSTAT)
#' @param cutoffs A two-element vector which specifies the cutoffs for PN[1]
#' (which is typically the gold standard CBC) and for PN[2] (typically the
#' "test" BG or iStat)
#' @param to.print If TRUE, prints results [Default]
#' @param to.return If TRUE, returns results [Default]
#'
transfusionConfusionMatrix <- function (value.x, value.y,
cutoffs = c(7., 7.),
to.print = T,
to.return = T) {
# Are the lengths equal
stopifnot(length(value.x) == length(value.y))
# Is `cutoffs` a two-element vector?
stopifnot(length(cutoffs) == 2)
if (to.print)
cat(sprintf('Total number of input rows: %d\n',length(value.x)))
# 2x2 Standard Table:
#
# Gold Standard (PN[1])
# value.x
# Pos | Neg
# ----------------------
# Pos | TP | FP |
# Test (PN[2]) |---------|----------|
# value.y Neg | FN | TN |
# ----------------------
#
# In the default case, we consider "POS" to reflect the true need for a
# transfusion, meaning the value was < the cutoff
if (to.print)
cat(sprintf('Gold Standard:\n\tPositive: %d (%0.2f %%)\n\tNegative: %d (%0.2f %%)\n',
sum( value.x < cutoffs[1] ),
sum( value.x < cutoffs[1] ) / length(value.x) * 100.,
sum( value.x >= cutoffs[1] ),
sum( value.x >= cutoffs[1] ) / length(value.x) * 100.))
# Rater A - yes, Rater B - yes
TP <- sum( (value.x < cutoffs[1]) & (value.y < cutoffs[2]) )
# Rater A - yes, Rater B - no
FP <- sum( (value.x >= cutoffs[1]) & (value.y < cutoffs[2]) )
# Rater A - no, Rater B - yes
FN <- sum( (value.x < cutoffs[1]) & (value.y >= cutoffs[2]) )
# Rater A - no, Rater B - no
TN <- sum( (value.x >= cutoffs[1]) & (value.y >= cutoffs[2]) )
# Dummy check - do these all add up to total length
stopifnot(TP+FP+TN+FN == length(value.x))
sens <- TP / (TP + FN)
spec <- TN / (TN + FP)
ppv <- TP / (TP + FP)
npv <- TN / (FN + TN)
if (to.print)
cat(sprintf(paste0(
'Cutoffs: PN[1]: %0.1f\tPN[2]: %0.1f\n',
'TP: %d (%0.4f %%)\t',
'FP: %d (%0.4f %%)\n',
'FN: %d (%0.4f %%)\t',
'TN: %d (%0.4f %%)\n',
'Sens: %0.4f\n',
'Spec: %0.4f\n',
'PPV: %0.4f\n',
'NPV: %0.4f\n',
'FOR (1-NPV): %0.4f\n',
'NNM (1/FOR): %0.4f\n\n'),
cutoffs[1], cutoffs[2],
TP, TP / length(value.x) * 100.,
FP, FP / length(value.x) * 100.,
FN, FN / length(value.x) * 100.,
TN, TN / length(value.x) * 100.,
sens, spec, ppv, npv, (1. - npv), 1. / (1. - npv)
))
if (to.return) {
return(list(
cutoffs = cutoffs,
TP = TP, FP = FP, TN = TN, FN = FN,
sens = sens, spec = spec, ppv = ppv, npv = npv,
falseOR = 1. - npv,
nnm = 1. / (1. - npv)
))
}
}
We can run this across a few standard values:
mat.across.cutoffs <- list()
for (pn2.cutoff in c(7.0, 7.5, 8.0, 8.5, 9.0)) {
res <- transfusionConfusionMatrix(
value.x = cbc.bg$NUM_VAL.x,
value.y = cbc.bg$NUM_VAL.y,
cutoffs = c(primary.hgb.cutoff, pn2.cutoff),
to.print = T,
to.return = T
)
mat.across.cutoffs <-
append(
mat.across.cutoffs,
list(list( # Use a double list here so that we enclose each cutoff results in a list
res = res, cutoff = pn2.cutoff
))
)
}
## Total number of input rows: 67077
## Gold Standard:
## Positive: 1159 (1.73 %)
## Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 441 (0.6575 %) FP: 107 (0.1595 %)
## FN: 718 (1.0704 %) TN: 65811 (98.1126 %)
## Sens: 0.3805
## Spec: 0.9984
## PPV: 0.8047
## NPV: 0.9892
## FOR (1-NPV): 0.0108
## NNM (1/FOR): 92.6588
##
## Total number of input rows: 67077
## Gold Standard:
## Positive: 1159 (1.73 %)
## Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 783 (1.1673 %) FP: 519 (0.7737 %)
## FN: 376 (0.5605 %) TN: 65399 (97.4984 %)
## Sens: 0.6756
## Spec: 0.9921
## PPV: 0.6014
## NPV: 0.9943
## FOR (1-NPV): 0.0057
## NNM (1/FOR): 174.9335
##
## Total number of input rows: 67077
## Gold Standard:
## Positive: 1159 (1.73 %)
## Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 925 (1.3790 %) FP: 1769 (2.6373 %)
## FN: 234 (0.3489 %) TN: 64149 (95.6349 %)
## Sens: 0.7981
## Spec: 0.9732
## PPV: 0.3434
## NPV: 0.9964
## FOR (1-NPV): 0.0036
## NNM (1/FOR): 275.1410
##
## Total number of input rows: 67077
## Gold Standard:
## Positive: 1159 (1.73 %)
## Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 993 (1.4804 %) FP: 4032 (6.0110 %)
## FN: 166 (0.2475 %) TN: 61886 (92.2611 %)
## Sens: 0.8568
## Spec: 0.9388
## PPV: 0.1976
## NPV: 0.9973
## FOR (1-NPV): 0.0027
## NNM (1/FOR): 373.8072
##
## Total number of input rows: 67077
## Gold Standard:
## Positive: 1159 (1.73 %)
## Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 1031 (1.5370 %) FP: 7332 (10.9307 %)
## FN: 128 (0.1908 %) TN: 58586 (87.3414 %)
## Sens: 0.8896
## Spec: 0.8888
## PPV: 0.1233
## NPV: 0.9978
## FOR (1-NPV): 0.0022
## NNM (1/FOR): 458.7031
rm(pn2.cutoff)
The above function can be run across a range of cutoff values to generate both an ROC and a P-R curve, which give information about the “performance” of the test - either BG or iStat. We can calculate the AUROC and the optimal test thresholds (as the points closest to 0,1).
#'
#' @title Calculate Threshold ROC
#'
#' @description Calculates an ROC and P-R based on Transfusion "Test"
#'
#' @details This function makes use of the above `transfusionConfusionMatrix`
#' function, which returns a sensitivity and specificity at a given pair
#' of thresholds - the CBC threshold (PN[1]) and the PN[2] threshold
#' (either BG or ISTAT).
#'
#' By generating confusion matrices across a range of test PN[2]
#' thresholds (default from 0 g/dL to 25 g/dL, the full range of Hgb), at
#' both "low" and "high" CBC Hgb thresholds (5 g/dL and 7 g/dL), we can
#' generate ROC curves as well as P-R curves for these two conditions
#' (low, high). These curves represent the ability of the "test" values
#' (either BG or iStat) to discriminate the "true" condition of anemia
#' as defined by a given threshold (low, 5 or high, 7).
#'
#' We calculate the AUROC using trapezoidal (numeric) integration. We can
#' also identify the "optimal" threshold to use to maximize sensitivity
#' and specificity by minimizing the Euclidian distance to the point (0,1)
#' on the ROC curve.
#'
#' Similarly, on the P-R curve, we can visualize the tradeoff between
#' precision (PPV) and recall (sensitivity). In this situation,
#' precision refers to the % of test values below a threshold which
#' represent actual anemia (or actual TPs), and is dependent on the
#' incidence of anemia in the population. Recall (sensitivity) represents
#' the % of actually anemic patients which are identified by the "test" Hgb.
#'
#' @param paired.df The paired data frame containing `value.x` and `value.y`
#' @param to.print If TRUE, prints results [Default]
#' @param to.return If TRUE, returns results as a list of elements [Default]
#' @param cutoff.minmax A two-element vector of the minimum and maximum Hgb
#' values used to generate the full cutoff sequence [Default: 0., 25.]
#' @param cutoff.by The difference between successive values in the cutoff seq
#'
calculateThresholdROC <- function (paired.df,
to.print = T, to.return = T,
cutoff.minmax = c(0., 25.),
cutoff.by = 0.1) {
# Establish the sequence for iterating through the threshold calculation
cutoff.range <- seq(
from = cutoff.minmax[1],
to = cutoff.minmax[2],
by = cutoff.by
)
# Define the empty result data frame
roc.df <- data.frame()
# Loop through the cutoff range
for (index in 1 : length(cutoff.range)) {
# First calculate using the "high" CBC (or PN[1]) value, 7.0
res.high <-
transfusionConfusionMatrix(
value.x = paired.df$NUM_VAL.x,
value.y = paired.df$NUM_VAL.y,
cutoffs = c(7., cutoff.range[index]),
to.print = F,
to.return = T
)
# Now calculate using the "low" CbC (or PN[1]) value, 5.0
res.low <-
transfusionConfusionMatrix(
value.x = paired.df$NUM_VAL.x,
value.y = paired.df$NUM_VAL.y,
cutoffs = c(5., cutoff.range[index]),
to.print = F,
to.return = T
)
# RBind to the data frame
roc.df <-
rbind(
roc.df,
data.frame(
tpr = res.high$sens,
fpr = 1.0 - res.high$spec,
precision = res.high$ppv,
recall = res.high$sens,
cbc.cutoff = 'High (7.0 g/dL)',
bg.cutoff = cutoff.range[index]
),
data.frame(
tpr = res.low$sens,
fpr = 1.0 - res.low$spec,
precision = res.low$ppv,
recall = res.low$sens,
cbc.cutoff = 'Low (5.0 g/dL)',
bg.cutoff = cutoff.range[index]
)
)
}
# Trapezoidal integration to determine AUROC values
auroc <-
roc.df %>%
dplyr::group_by(cbc.cutoff) %>%
dplyr::mutate(
diff.fpr = lead(fpr) - fpr
) %>%
dplyr::filter(!is.na(diff.fpr)) %>%
dplyr::mutate(
mult = diff.fpr * tpr
) %>%
dplyr::summarize(
AUROC = sum(mult)
)
# Display results of trapezoidal integration
if (to.print) {
print(
knitr::kable(
auroc,
col.names = c('CBC Cutoff', 'AUROC'),
digits = c(0,3)
) %>%
kableExtra::kable_paper("hover")
)
}
# Identify optimum distance to point (0,1) by Euler's distance
opt.cutoff <-
roc.df %>%
dplyr::mutate(
dist = sqrt( (1. - tpr) ^ 2 + (fpr) ^ 2 )
) %>%
dplyr::arrange(cbc.cutoff, dist) %>%
dplyr::group_by(cbc.cutoff) %>%
dplyr::summarize(
DIST = first (dist),
CUTOFF = first(bg.cutoff),
SENS = first(tpr),
SPEC = 1. - first(fpr)
)
if (to.print)
print(
knitr::kable(
opt.cutoff,
col.names = c('CBC Cutoff', 'Distance', 'Cutoff', 'Sens', 'Spec'),
digits = c(0, 3, 1, 3, 3)
) %>%
kableExtra::kable_paper("hover")
)
# Plot ROC curve
p.roc <-
roc.df %>%
ggplot(aes(x = fpr, y = tpr, color = cbc.cutoff)) +
geom_point(size = 2) +
geom_line(size = 1.2) +
annotate('segment', x = 0, xend = 1, y = 0, yend = 1, color = '#666666', linetype = 'dashed') +
xlab('False positive rate (1 - spec)') +
ylab('True positive rate (sens)') +
labs(color = 'CBC Cutoff') +
theme_bw() +
theme(legend.position = c(.6,.3))
if (to.print)
print(p.roc)
# Plot Precision Recall curve
p.pr <-
roc.df %>%
ggplot(aes(x = recall, y = precision, color = cbc.cutoff)) +
geom_point(size = 2) +
geom_line(size = 1.2) +
xlim(0,1) + ylim(0,1) +
xlab('Recall (sens)') +
ylab('Precision (ppv)') +
labs(color = 'CBC Cutoff') +
theme_bw() +
theme(legend.position = c(.8,.9))
if (to.print)
print(p.pr)
if (to.return)
return(list(
auroc = auroc,
opt.cutoff = opt.cutoff,
p.roc = p.roc,
p.pr = p.pr#,
#roc.df = roc.df
))
}
We do this for a standard range and save / print the results:
cbc.bg.thresh.roc <- calculateThresholdROC(
paired.df = cbc.bg,
to.print = T,
to.return = T,
cutoff.by = 0.01)
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.945 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.811 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.151 </td>
## <td style="text-align:right;"> 8.7 </td>
## <td style="text-align:right;"> 0.878 </td>
## <td style="text-align:right;"> 0.911 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.335 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 0.685 </td>
## <td style="text-align:right;"> 0.887 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).
Now we save out all of the plots and calculations that we have completed:
save(
file = file.path(
Sys.getenv('PICU_LAB_DATA_PATH'),
paste0(
Sys.getenv('PICU_LAB_SITE_NAME'),
'_pri_cbc_bg_clinical_',
run.date, '.rData'
)
),
primary.cutoff,
#cbc.bg, covars.df,
error.grid.cbc.bg,
thresh.list, regress.res, mat.across.cutoffs,
primary.hgb.cutoff, cbc.bg.thresh.roc
)
Here we re-do the clinical accuracy analyses across CBC vs POC (iStat) procedures. To do this, we first (again) define a function that runs all of the above work - similar to in 02_Analytic_Accuracy.Rmd, except this function is termed runAllClinical().
#'
#' @title Run All Clinical
#'
#' @description Runs through all clinical accuracy tasks, for sensitivity analysis
#'
#' @param labs.df The original labs data frame
#' @param cohort.df The original cohort data frame
#' @param compare.PN The comparison PROC name (e.g. either `BG` or `ISTAT`)
#' @param time.diff The cutoff time difference (in minutes) for determining
#' whether labs are "simultaneous"
#' @param multi.per.pt If TRUE, allows all results from patients;
#' If FALSE, only the first (chronological) result from a patient is included
#' @param primary.hgb.cutoff The primary Hgb cutoff to use for Cohen's Kappa
#' @param sens.hgb.cutoffs The secondary Hgb cutoffs for sensitivity analysis
#' @param run.date A string representation of date for saving (format: %Y-%m-%d)
#' @param save.fn The file name (which will be concatenated with SITE and run.date),
#' or NA [Default] if we do not wish to save any results to a file
#'
runAllClinical <- function (labs.df, cohort.df, compare.PN,
time.diff, multi.per.pt, primary.hgb.cutoff,
sens.hgb.cutoffs, run.date, save.fn = NA) {
# Generate the paired dataset
paired.df <- createPairedDataset(
labs.df = labs.df,
cohort.df = cohort.df,
PN = c('CBC', compare.PN),
CN = 'Hgb',
time.diff = time.diff,
multi.per.pt = multi.per.pt
)
# Calculate and display the error grid
error.grid <- calculateErrorGrid(
df = paired.df,
to.plot = T,
to.return = T
)
# Gather covariates for this paired set
covars.df <- gatherCovariates(paired.df, labs.df)
# Display summaries of those covars
displayCovariateStats(covars.df)
# Threshold lists are a list of three-element vectors, which the three elements
# corresponding to: min, max, threshold
# This can be read as, between the min and max, the mean diff must be less than
# the threshold, otherwise it is not `WELL_MATCHED`
thresh.list <-
list(
c(-100, 6, 1.5),
c(6, 9, 1.0),
c(9, 100, 1.5)
)
# Join, impute, and run regression on these pairs
regress.res <-
joinImputeRegress(
paired.df = paired.df,
covars.df = covars.df,
thresh.list = thresh.list,
impute.fx = median,
ci = T
)
# Calculate the Cohen's Kappa, both at primary and sensitivity hgb cutoffs
calculateCohenKappa(
values.x = paired.df$NUM_VAL.x,
values.y = paired.df$NUM_VAL.y,
cutoff = primary.hgb.cutoff,
to.print = T
)
for (thresh in sens.hgb.cutoffs) {
calculateCohenKappa(
values.x = paired.df$NUM_VAL.x,
values.y = paired.df$NUM_VAL.y,
cutoff = thresh,
to.print = T
)
}
rm(thresh)
# Compute Transfusion Confusion Matrix at a range of PN[2] cutoff values
mat.across.cutoffs <- list()
for (pn2.cutoff in c(7.0, 7.5, 8.0, 8.5, 9.0)) {
res <- transfusionConfusionMatrix(
value.x = paired.df$NUM_VAL.x,
value.y = paired.df$NUM_VAL.y,
cutoffs = c(primary.hgb.cutoff, pn2.cutoff),
to.print = T,
to.return = T
)
mat.across.cutoffs <-
append(
mat.across.cutoffs,
list(list( # Use a double list here so that we enclose each cutoff results in a list
res = res, cutoff = pn2.cutoff
))
)
}
rm(pn2.cutoff)
# And calculate the ROC and P-R curves
thresh.roc <- calculateThresholdROC(
paired.df = paired.df,
to.print = T,
to.return = T,
cutoff.by = 0.01)
# Save out?
if (! any(is.na(save.fn)) ) {
save(
file = file.path(
Sys.getenv('PICU_LAB_DATA_PATH'),
paste0(
Sys.getenv('PICU_LAB_SITE_NAME'),
'_', save.fn, '_',
run.date, '.rData'
)
),
primary.cutoff,
#paired.df, covars.df,
error.grid,
thresh.list, regress.res, mat.across.cutoffs,
primary.hgb.cutoff, thresh.roc
)
}
}
Now we run for the POC values using the primary cutoff, allowing all results per patient (if the PROC exists):
if ('ISTAT' %in% unique(labs.df$PROC_NAME)) {
runAllClinical(
labs.df,
cohort.df,
compare.PN = 'ISTAT',
time.diff = primary.cutoff,
multi.per.pt = T,
primary.hgb.cutoff = primary.hgb.cutoff,
sens.hgb.cutoffs = sens.hgb.cutoffs,
run.date = run.date,
save.fn = 'pri_cbc_istat_clinical'
)
}
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 3802
## Number of non-duplicated first PROC_NAME rows: 3581
## Number of non-duplicated second PROC_NAME rows: 3575
## Number of paired, simultaneous values: 3575
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 3163 (88.5 %)
## Box B: 3575 (100.0 %)
## Box C: 3 (0.1 %)
## Box D: 7 (0.2 %)
## Counts by Area:
## Green Area: 3163 (88.48 %)
## Yellow Area: 402 (11.24 %)
## Red Area: 10 (0.28 %)
## NUmber of unique PN[1] order procedure keys: 3575
## Lactate
## Min. : 0.600
## 1st Qu.: 1.200
## Median : 2.150
## Mean : 3.943
## 3rd Qu.: 4.950
## Max. :19.100
## NA's :3459
## Count (and %) of NAs in Lactate column: 3459 (96.76 %)
## Gluc
## Min. : 31.0
## 1st Qu.: 95.5
## Median :125.0
## Mean :141.9
## 3rd Qu.:175.0
## Max. :440.0
## NA's :3456
## Count (and %) of NAs in Gluc column: 3456 (96.67 %)
## iCal
## Min. :0.42
## 1st Qu.:1.16
## Median :1.25
## Mean :1.26
## 3rd Qu.:1.34
## Max. :2.72
## NA's :12
## Count (and %) of NAs in iCal column: 12 (0.34 %)
## Bicarb
## Min. : 1.60
## 1st Qu.:20.80
## Median :24.50
## Mean :25.04
## 3rd Qu.:28.90
## Max. :58.00
## NA's :35
## Count (and %) of NAs in Bicarb column: 35 (0.98 %)
## pH
## Min. :6.632
## 1st Qu.:7.251
## Median :7.335
## Mean :7.318
## 3rd Qu.:7.401
## Max. :7.884
## NA's :14
## Count (and %) of NAs in pH column: 14 (0.39 %)
## Number (%) of `WELL MATCHED`: 2859 (79.97 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1605 0.6003 0.6442 0.6906 0.9104
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.360e+00 2.599e+00 -2.447 0.01439 *
## NUM_VAL.y 3.444e-03 1.515e-02 0.227 0.82023
## pH 1.030e-01 3.467e-02 2.969 0.00298 **
## Gluc 7.317e-04 3.734e-03 0.196 0.84464
## Bicarb -1.342e-02 6.883e-03 -1.950 0.05118 .
## iCal 1.859e-01 2.434e-01 0.764 0.44511
## Lactate 1.479e-02 5.817e-02 0.254 0.79929
## AGE_PROC -3.294e-06 2.258e-05 -0.146 0.88403
## DEPTPICU 2.445e-01 1.066e-01 2.294 0.02181 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3580.6 on 3574 degrees of freedom
## Residual deviance: 3564.6 on 3566 degrees of freedom
## AIC: 3582.6
##
## Number of Fisher Scoring iterations: 4
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 0.001730022 1.003449830 1.108444456 1.000731926 0.986668058 1.204252603
## Lactate AGE_PROC DEPTPICU
## 1.014899926 0.999996706 1.277027204
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.078705e-05 0.288014
## NUM_VAL.y 9.741428e-01 1.033785
## pH 1.035271e+00 1.186077
## Gluc 9.938440e-01 1.008658
## Bicarb 9.734956e-01 1.000130
## iCal 7.503648e-01 1.948778
## Lactate 9.157862e-01 1.159105
## AGE_PROC 9.999528e-01 1.000041
## DEPTPICU 1.035634e+00 1.573119
## Pre-Range Check Length: 3575
## Cutoff value used: 7
## X :: Pos: 0.04 Neg: 0.96
## Y :: Pos: 0.06 Neg: 0.94
## P.pos: 0.0020
## P.neg: 0.9103
## P.obs: 0.96
## P.exp: 0.91
## Kappa: 0.56
## Pre-Range Check Length: 3575
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0000
## P.neg: 0.9911
## P.obs: 0.99
## P.exp: 0.99
## Kappa: 0.18
## Pre-Range Check Length: 3575
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.22 Neg: 0.78
## P.pos: 0.0440
## P.neg: 0.6242
## P.obs: 0.90
## P.exp: 0.67
## Kappa: 0.70
## Total number of input rows: 3575
## Gold Standard:
## Positive: 130 (3.64 %)
## Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 95 (2.6573 %) FP: 103 (2.8811 %)
## FN: 35 (0.9790 %) TN: 3342 (93.4825 %)
## Sens: 0.7308
## Spec: 0.9701
## PPV: 0.4798
## NPV: 0.9896
## FOR (1-NPV): 0.0104
## NNM (1/FOR): 96.4857
##
## Total number of input rows: 3575
## Gold Standard:
## Positive: 130 (3.64 %)
## Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 102 (2.8531 %) FP: 157 (4.3916 %)
## FN: 28 (0.7832 %) TN: 3288 (91.9720 %)
## Sens: 0.7846
## Spec: 0.9544
## PPV: 0.3938
## NPV: 0.9916
## FOR (1-NPV): 0.0084
## NNM (1/FOR): 118.4286
##
## Total number of input rows: 3575
## Gold Standard:
## Positive: 130 (3.64 %)
## Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 117 (3.2727 %) FP: 314 (8.7832 %)
## FN: 13 (0.3636 %) TN: 3131 (87.5804 %)
## Sens: 0.9000
## Spec: 0.9089
## PPV: 0.2715
## NPV: 0.9959
## FOR (1-NPV): 0.0041
## NNM (1/FOR): 241.8462
##
## Total number of input rows: 3575
## Gold Standard:
## Positive: 130 (3.64 %)
## Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 120 (3.3566 %) FP: 418 (11.6923 %)
## FN: 10 (0.2797 %) TN: 3027 (84.6713 %)
## Sens: 0.9231
## Spec: 0.8787
## PPV: 0.2230
## NPV: 0.9967
## FOR (1-NPV): 0.0033
## NNM (1/FOR): 303.7000
##
## Total number of input rows: 3575
## Gold Standard:
## Positive: 130 (3.64 %)
## Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 124 (3.4685 %) FP: 654 (18.2937 %)
## FN: 6 (0.1678 %) TN: 2791 (78.0699 %)
## Sens: 0.9538
## Spec: 0.8102
## PPV: 0.1594
## NPV: 0.9979
## FOR (1-NPV): 0.0021
## NNM (1/FOR): 466.1667
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.955 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.855 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.135 </td>
## <td style="text-align:right;"> 7.8 </td>
## <td style="text-align:right;"> 0.900 </td>
## <td style="text-align:right;"> 0.909 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.272 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 0.833 </td>
## <td style="text-align:right;"> 0.784 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).
Now we complete some of the same above measures using different permutations, as sensitivity analyses (same as we did in 02_Analytic_Accuracy.Rmd).
First we change the parameters to require a single value per patient. We run this across both BG and ISTAT pairs (if they exist).
for (proc.option in c('BG', 'ISTAT')) {
if (proc.option %in% unique(labs.df$PROC_NAME)) {
runAllClinical(
labs.df,
cohort.df,
compare.PN = proc.option,
time.diff = primary.cutoff,
multi.per.pt = F, # This is the change in this section
primary.hgb.cutoff = primary.hgb.cutoff,
sens.hgb.cutoffs = sens.hgb.cutoffs,
run.date = run.date,
save.fn = paste0('single_pt_cbc_', tolower(proc.option), '_clinical')
)
}
}
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 72997
## Number of non-duplicated first PROC_NAME rows: 67141
## Number of non-duplicated second PROC_NAME rows: 67077
## Number of paired, simultaneous values: 9511
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 9233 (97.1 %)
## Box B: 9511 (100.0 %)
## Box C: 3 (0.0 %)
## Box D: 4 (0.0 %)
## Counts by Area:
## Green Area: 9233 (97.08 %)
## Yellow Area: 271 (2.85 %)
## Red Area: 7 (0.07 %)
## NUmber of unique PN[1] order procedure keys: 9511
## Lactate
## Min. : 0.500
## 1st Qu.: 1.100
## Median : 1.600
## Mean : 2.267
## 3rd Qu.: 2.500
## Max. :27.200
## NA's :3217
## Count (and %) of NAs in Lactate column: 3217 (33.82 %)
## Gluc
## Min. : 13.0
## 1st Qu.: 99.0
## Median : 121.0
## Mean : 135.9
## 3rd Qu.: 152.0
## Max. :1464.0
## NA's :796
## Count (and %) of NAs in Gluc column: 796 (8.37 %)
## iCal
## Min. :0.410
## 1st Qu.:1.110
## Median :1.170
## Mean :1.167
## 3rd Qu.:1.220
## Max. :2.600
## NA's :786
## Count (and %) of NAs in iCal column: 786 (8.26 %)
## Bicarb
## Min. : 1.80
## 1st Qu.:21.60
## Median :24.20
## Mean :24.34
## 3rd Qu.:27.00
## Max. :59.50
## NA's :3
## Count (and %) of NAs in Bicarb column: 3 (0.03 %)
## pH
## Min. :6.605
## 1st Qu.:7.333
## Median :7.378
## Mean :7.367
## 3rd Qu.:7.416
## Max. :7.689
## NA's :2
## Count (and %) of NAs in pH column: 2 (0.02 %)
## Number (%) of `WELL MATCHED`: 9079 (95.46 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1144 0.2078 0.3070 0.3571 0.8432
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.174e+01 4.833e+00 2.428 0.01518 *
## NUM_VAL.y 9.355e-02 2.335e-02 4.006 6.17e-05 ***
## pH -1.045e-01 6.420e-02 -1.627 0.10364
## Gluc -8.182e-04 7.226e-04 -1.132 0.25752
## Bicarb 8.562e-03 1.077e-02 0.795 0.42683
## iCal -1.312e+00 4.551e-01 -2.883 0.00394 **
## Lactate 1.638e-02 2.873e-02 0.570 0.56865
## AGE_PROC -5.347e-05 2.465e-05 -2.169 0.03009 *
## DEPTPICU -8.241e-01 1.416e-01 -5.822 5.83e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3515.4 on 9510 degrees of freedom
## Residual deviance: 3395.4 on 9502 degrees of freedom
## AIC: 3413.4
##
## Number of Fisher Scoring iterations: 6
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 1.249565e+05 1.098064e+00 9.007870e-01 9.991822e-01 1.008598e+00 2.693275e-01
## Lactate AGE_PROC DEPTPICU
## 1.016510e+00 9.999465e-01 4.386421e-01
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 10.8417694 1.827967e+09
## NUM_VAL.y 1.0491188 1.149685e+00
## pH 0.7929680 1.019832e+00
## Gluc 0.9978556 1.000701e+00
## Bicarb 0.9878004 1.030392e+00
## iCal 0.1124201 6.734354e-01
## Lactate 0.9641851 1.079449e+00
## AGE_PROC 0.9998985 9.999952e-01
## DEPTPICU 0.3307551 5.763717e-01
## Pre-Range Check Length: 9511
## Cutoff value used: 7
## X :: Pos: 0.02 Neg: 0.98
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0002
## P.neg: 0.9733
## P.obs: 0.99
## P.exp: 0.97
## Kappa: 0.66
## Pre-Range Check Length: 9511
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9967
## P.obs: 1.00
## P.exp: 1.00
## Kappa: 0.71
## Pre-Range Check Length: 9511
## Cutoff value used: 9
## X :: Pos: 0.13 Neg: 0.87
## Y :: Pos: 0.08 Neg: 0.92
## P.pos: 0.0109
## P.neg: 0.7970
## P.obs: 0.95
## P.exp: 0.81
## Kappa: 0.71
## Total number of input rows: 9511
## Gold Standard:
## Positive: 164 (1.72 %)
## Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 85 (0.8937 %) FP: 7 (0.0736 %)
## FN: 79 (0.8306 %) TN: 9340 (98.2021 %)
## Sens: 0.5183
## Spec: 0.9993
## PPV: 0.9239
## NPV: 0.9916
## FOR (1-NPV): 0.0084
## NNM (1/FOR): 119.2278
##
## Total number of input rows: 9511
## Gold Standard:
## Positive: 164 (1.72 %)
## Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 130 (1.3668 %) FP: 52 (0.5467 %)
## FN: 34 (0.3575 %) TN: 9295 (97.7289 %)
## Sens: 0.7927
## Spec: 0.9944
## PPV: 0.7143
## NPV: 0.9964
## FOR (1-NPV): 0.0036
## NNM (1/FOR): 274.3824
##
## Total number of input rows: 9511
## Gold Standard:
## Positive: 164 (1.72 %)
## Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 152 (1.5981 %) FP: 151 (1.5876 %)
## FN: 12 (0.1262 %) TN: 9196 (96.6880 %)
## Sens: 0.9268
## Spec: 0.9838
## PPV: 0.5017
## NPV: 0.9987
## FOR (1-NPV): 0.0013
## NNM (1/FOR): 767.3333
##
## Total number of input rows: 9511
## Gold Standard:
## Positive: 164 (1.72 %)
## Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 153 (1.6087 %) FP: 342 (3.5958 %)
## FN: 11 (0.1157 %) TN: 9005 (94.6798 %)
## Sens: 0.9329
## Spec: 0.9634
## PPV: 0.3091
## NPV: 0.9988
## FOR (1-NPV): 0.0012
## NNM (1/FOR): 819.6364
##
## Total number of input rows: 9511
## Gold Standard:
## Positive: 164 (1.72 %)
## Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 155 (1.6297 %) FP: 635 (6.6765 %)
## FN: 9 (0.0946 %) TN: 8712 (91.5992 %)
## Sens: 0.9451
## Spec: 0.9321
## PPV: 0.1962
## NPV: 0.9990
## FOR (1-NPV): 0.0010
## NNM (1/FOR): 969.0000
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.975 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.983 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.074 </td>
## <td style="text-align:right;"> 8.3 </td>
## <td style="text-align:right;"> 0.933 </td>
## <td style="text-align:right;"> 0.969 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.058 </td>
## <td style="text-align:right;"> 7.7 </td>
## <td style="text-align:right;"> 0.947 </td>
## <td style="text-align:right;"> 0.976 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 602 rows containing missing values (geom_point).
## Warning: Removed 602 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 3802
## Number of non-duplicated first PROC_NAME rows: 3581
## Number of non-duplicated second PROC_NAME rows: 3575
## Number of paired, simultaneous values: 2359
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 2131 (90.3 %)
## Box B: 2359 (100.0 %)
## Box C: 3 (0.1 %)
## Box D: 5 (0.2 %)
## Counts by Area:
## Green Area: 2131 (90.33 %)
## Yellow Area: 220 (9.33 %)
## Red Area: 8 (0.34 %)
## NUmber of unique PN[1] order procedure keys: 2359
## Lactate
## Min. : 0.600
## 1st Qu.: 1.200
## Median : 2.100
## Mean : 3.595
## 3rd Qu.: 4.300
## Max. :16.100
## NA's :2273
## Count (and %) of NAs in Lactate column: 2273 (96.35 %)
## Gluc
## Min. : 31.0
## 1st Qu.: 91.0
## Median :121.0
## Mean :135.1
## 3rd Qu.:167.0
## Max. :339.0
## NA's :2270
## Count (and %) of NAs in Gluc column: 2270 (96.23 %)
## iCal
## Min. :0.420
## 1st Qu.:1.170
## Median :1.250
## Mean :1.258
## 3rd Qu.:1.340
## Max. :2.490
## NA's :7
## Count (and %) of NAs in iCal column: 7 (0.30 %)
## Bicarb
## Min. : 1.60
## 1st Qu.:20.12
## Median :23.70
## Mean :24.17
## 3rd Qu.:27.70
## Max. :56.80
## NA's :21
## Count (and %) of NAs in Bicarb column: 21 (0.89 %)
## pH
## Min. :6.661
## 1st Qu.:7.241
## Median :7.326
## Mean :7.311
## 3rd Qu.:7.396
## Max. :7.884
## NA's :8
## Count (and %) of NAs in pH column: 8 (0.34 %)
## Number (%) of `WELL MATCHED`: 1943 (82.37 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0906 0.5595 0.6016 0.6430 0.8063
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.532e+00 3.384e+00 -1.635 0.1021
## NUM_VAL.y -2.619e-02 1.962e-02 -1.335 0.1819
## pH 9.020e-02 4.468e-02 2.019 0.0435 *
## Gluc 4.373e-03 5.460e-03 0.801 0.4232
## Bicarb -1.026e-02 9.213e-03 -1.114 0.2654
## iCal 3.225e-01 3.406e-01 0.947 0.3437
## Lactate -9.694e-03 7.023e-02 -0.138 0.8902
## AGE_PROC 2.056e-05 2.925e-05 0.703 0.4821
## DEPTPICU 1.263e-01 1.442e-01 0.876 0.3812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2197.7 on 2358 degrees of freedom
## Residual deviance: 2186.3 on 2350 degrees of freedom
## AIC: 2204.3
##
## Number of Fisher Scoring iterations: 4
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 0.003956617 0.974153798 1.094397656 1.004382158 0.989791192 1.380556839
## Lactate AGE_PROC DEPTPICU
## 0.990353153 1.000020560 1.134594223
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 5.404230e-06 3.147582
## NUM_VAL.y 9.374504e-01 1.012414
## pH 1.001967e+00 1.193919
## Gluc 9.945402e-01 1.016167
## Bicarb 9.721857e-01 1.007957
## iCal 7.133390e-01 2.710512
## Lactate 8.723080e-01 1.164277
## AGE_PROC 9.999638e-01 1.000079
## DEPTPICU 8.538870e-01 1.503257
## Pre-Range Check Length: 2359
## Cutoff value used: 7
## X :: Pos: 0.03 Neg: 0.97
## Y :: Pos: 0.05 Neg: 0.95
## P.pos: 0.0016
## P.neg: 0.9210
## P.obs: 0.97
## P.exp: 0.92
## Kappa: 0.59
## Pre-Range Check Length: 2359
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9911
## P.obs: 0.99
## P.exp: 0.99
## Kappa: 0.19
## Pre-Range Check Length: 2359
## Cutoff value used: 9
## X :: Pos: 0.19 Neg: 0.81
## Y :: Pos: 0.20 Neg: 0.80
## P.pos: 0.0367
## P.neg: 0.6535
## P.obs: 0.91
## P.exp: 0.69
## Kappa: 0.72
## Total number of input rows: 2359
## Gold Standard:
## Positive: 80 (3.39 %)
## Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 58 (2.4587 %) FP: 52 (2.2043 %)
## FN: 22 (0.9326 %) TN: 2227 (94.4044 %)
## Sens: 0.7250
## Spec: 0.9772
## PPV: 0.5273
## NPV: 0.9902
## FOR (1-NPV): 0.0098
## NNM (1/FOR): 102.2273
##
## Total number of input rows: 2359
## Gold Standard:
## Positive: 80 (3.39 %)
## Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 62 (2.6282 %) FP: 87 (3.6880 %)
## FN: 18 (0.7630 %) TN: 2192 (92.9207 %)
## Sens: 0.7750
## Spec: 0.9618
## PPV: 0.4161
## NPV: 0.9919
## FOR (1-NPV): 0.0081
## NNM (1/FOR): 122.7778
##
## Total number of input rows: 2359
## Gold Standard:
## Positive: 80 (3.39 %)
## Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 72 (3.0521 %) FP: 174 (7.3760 %)
## FN: 8 (0.3391 %) TN: 2105 (89.2327 %)
## Sens: 0.9000
## Spec: 0.9237
## PPV: 0.2927
## NPV: 0.9962
## FOR (1-NPV): 0.0038
## NNM (1/FOR): 264.1250
##
## Total number of input rows: 2359
## Gold Standard:
## Positive: 80 (3.39 %)
## Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 73 (3.0945 %) FP: 233 (9.8771 %)
## FN: 7 (0.2967 %) TN: 2046 (86.7317 %)
## Sens: 0.9125
## Spec: 0.8978
## PPV: 0.2386
## NPV: 0.9966
## FOR (1-NPV): 0.0034
## NNM (1/FOR): 293.2857
##
## Total number of input rows: 2359
## Gold Standard:
## Positive: 80 (3.39 %)
## Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 76 (3.2217 %) FP: 388 (16.4476 %)
## FN: 4 (0.1696 %) TN: 1891 (80.1611 %)
## Sens: 0.9500
## Spec: 0.8297
## PPV: 0.1638
## NPV: 0.9979
## FOR (1-NPV): 0.0021
## NNM (1/FOR): 473.7500
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.954 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.857 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.126 </td>
## <td style="text-align:right;"> 7.8 </td>
## <td style="text-align:right;"> 0.9 </td>
## <td style="text-align:right;"> 0.924 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.237 </td>
## <td style="text-align:right;"> 8.2 </td>
## <td style="text-align:right;"> 0.8 </td>
## <td style="text-align:right;"> 0.873 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).
Now we change parameters to alter the cutoff (min) between labs that get counted as “simultaneous” labs. We revert back to allowing multiple values per patient. We run this across both BG and ISTAT pairs (if they exist), across all sens.cutoffs values.
for (cutoff in sens.cutoffs) {
for (proc.option in c('BG', 'ISTAT')) {
if (proc.option %in% unique(labs.df$PROC_NAME)) {
runAllClinical(
labs.df,
cohort.df,
compare.PN = proc.option,
time.diff = cutoff,
multi.per.pt = T,
primary.hgb.cutoff = primary.hgb.cutoff,
sens.hgb.cutoffs = sens.hgb.cutoffs,
run.date = run.date,
save.fn = NA
)
} # If the PROC exists
} # Across BG vs ISTAT
} # Across cutoffs
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 60273
## Number of non-duplicated first PROC_NAME rows: 58865
## Number of non-duplicated second PROC_NAME rows: 58817
## Number of paired, simultaneous values: 58817
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 55736 (94.8 %)
## Box B: 58817 (100.0 %)
## Box C: 39 (0.1 %)
## Box D: 22 (0.0 %)
## Counts by Area:
## Green Area: 55736 (94.76 %)
## Yellow Area: 3020 (5.13 %)
## Red Area: 61 (0.10 %)
## NUmber of unique PN[1] order procedure keys: 58817
## Lactate
## Min. : 0.500
## 1st Qu.: 1.100
## Median : 1.500
## Mean : 2.031
## 3rd Qu.: 2.100
## Max. :27.200
## NA's :14846
## Count (and %) of NAs in Lactate column: 14846 (25.24 %)
## Gluc
## Min. : 13.0
## 1st Qu.: 97.0
## Median : 115.0
## Mean : 127.3
## 3rd Qu.: 140.0
## Max. :3008.0
## NA's :4957
## Count (and %) of NAs in Gluc column: 4957 (8.43 %)
## iCal
## Min. :0.270
## 1st Qu.:1.120
## Median :1.170
## Mean :1.177
## 3rd Qu.:1.230
## Max. :2.740
## NA's :4907
## Count (and %) of NAs in iCal column: 4907 (8.34 %)
## Bicarb
## Min. : 1.80
## 1st Qu.:24.00
## Median :27.40
## Mean :28.22
## 3rd Qu.:31.80
## Max. :82.60
## NA's :4
## Count (and %) of NAs in Bicarb column: 4 (0.01 %)
## pH
## Min. : 6.615
## 1st Qu.: 7.354
## Median : 7.398
## Mean : 7.393
## 3rd Qu.: 7.437
## Max. :37.600
## NA's :4
## Count (and %) of NAs in pH column: 4 (0.01 %)
## Number (%) of `WELL MATCHED`: 54537 (92.72 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0129 0.2454 0.3975 0.4401 1.0351
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.767e+00 5.394e-01 6.984 2.88e-12 ***
## NUM_VAL.y 3.001e-02 8.130e-03 3.691 0.000223 ***
## pH -6.668e-03 6.459e-03 -1.032 0.301899
## Gluc -6.591e-04 2.550e-04 -2.585 0.009750 **
## Bicarb 1.993e-03 2.495e-03 0.799 0.424454
## iCal -2.219e-01 1.616e-01 -1.373 0.169765
## Lactate 9.030e-02 1.250e-02 7.226 4.98e-13 ***
## AGE_PROC -1.055e-04 7.689e-06 -13.719 < 2e-16 ***
## DEPTPICU -9.561e-01 4.807e-02 -19.892 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 30672 on 58816 degrees of freedom
## Residual deviance: 29472 on 58808 degrees of freedom
## AIC: 29490
##
## Number of Fisher Scoring iterations: 6
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 43.2552951 1.0304637 0.9933539 0.9993412 1.0019950 0.8010318
## Lactate AGE_PROC DEPTPICU
## 1.0944984 0.9998945 0.3843909
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 11.0130381 140.1854815
## NUM_VAL.y 1.0142103 1.0470525
## pH 0.9789048 1.0107766
## Gluc 0.9988511 0.9998556
## Bicarb 0.9971292 1.0069301
## iCal 0.5844406 1.1010312
## Lactate 1.0687174 1.1223763
## AGE_PROC 0.9998795 0.9999096
## DEPTPICU 0.3496007 0.4220938
## Pre-Range Check Length: 58817
## Cutoff value used: 7
## X :: Pos: 0.02 Neg: 0.98
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0001
## P.neg: 0.9753
## P.obs: 0.99
## P.exp: 0.98
## Kappa: 0.51
## Pre-Range Check Length: 58817
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9985
## P.obs: 1.00
## P.exp: 1.00
## Kappa: 0.37
## Pre-Range Check Length: 58817
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.12 Neg: 0.88
## P.pos: 0.0252
## P.neg: 0.6980
## P.obs: 0.91
## P.exp: 0.72
## Kappa: 0.66
## Total number of input rows: 58817
## Gold Standard:
## Positive: 1002 (1.70 %)
## Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 374 (0.6359 %) FP: 87 (0.1479 %)
## FN: 628 (1.0677 %) TN: 57728 (98.1485 %)
## Sens: 0.3733
## Spec: 0.9985
## PPV: 0.8113
## NPV: 0.9892
## FOR (1-NPV): 0.0108
## NNM (1/FOR): 92.9236
##
## Total number of input rows: 58817
## Gold Standard:
## Positive: 1002 (1.70 %)
## Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 665 (1.1306 %) FP: 449 (0.7634 %)
## FN: 337 (0.5730 %) TN: 57366 (97.5330 %)
## Sens: 0.6637
## Spec: 0.9922
## PPV: 0.5969
## NPV: 0.9942
## FOR (1-NPV): 0.0058
## NNM (1/FOR): 171.2255
##
## Total number of input rows: 58817
## Gold Standard:
## Positive: 1002 (1.70 %)
## Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 794 (1.3499 %) FP: 1553 (2.6404 %)
## FN: 208 (0.3536 %) TN: 56262 (95.6560 %)
## Sens: 0.7924
## Spec: 0.9731
## PPV: 0.3383
## NPV: 0.9963
## FOR (1-NPV): 0.0037
## NNM (1/FOR): 271.4904
##
## Total number of input rows: 58817
## Gold Standard:
## Positive: 1002 (1.70 %)
## Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 850 (1.4452 %) FP: 3539 (6.0170 %)
## FN: 152 (0.2584 %) TN: 54276 (92.2794 %)
## Sens: 0.8483
## Spec: 0.9388
## PPV: 0.1937
## NPV: 0.9972
## FOR (1-NPV): 0.0028
## NNM (1/FOR): 358.0789
##
## Total number of input rows: 58817
## Gold Standard:
## Positive: 1002 (1.70 %)
## Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 884 (1.5030 %) FP: 6422 (10.9186 %)
## FN: 118 (0.2006 %) TN: 51393 (87.3778 %)
## Sens: 0.8822
## Spec: 0.8889
## PPV: 0.1210
## NPV: 0.9977
## FOR (1-NPV): 0.0023
## NNM (1/FOR): 436.5339
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.941 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.777 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.157 </td>
## <td style="text-align:right;"> 8.7 </td>
## <td style="text-align:right;"> 0.871 </td>
## <td style="text-align:right;"> 0.910 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.391 </td>
## <td style="text-align:right;"> 8.7 </td>
## <td style="text-align:right;"> 0.623 </td>
## <td style="text-align:right;"> 0.898 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 279
## Number of non-duplicated first PROC_NAME rows: 277
## Number of non-duplicated second PROC_NAME rows: 276
## Number of paired, simultaneous values: 276
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 247 (89.5 %)
## Box B: 276 (100.0 %)
## Box C: 0 (0.0 %)
## Box D: 1 (0.4 %)
## Counts by Area:
## Green Area: 247 (89.49 %)
## Yellow Area: 28 (10.14 %)
## Red Area: 1 (0.36 %)
## NUmber of unique PN[1] order procedure keys: 276
## Lactate
## Min. : 0.600
## 1st Qu.: 1.300
## Median : 2.100
## Mean : 3.701
## 3rd Qu.: 4.000
## Max. :19.100
## NA's :190
## Count (and %) of NAs in Lactate column: 190 (68.84 %)
## Gluc
## Min. : 31.0
## 1st Qu.: 90.0
## Median :125.0
## Mean :137.6
## 3rd Qu.:171.0
## Max. :326.0
## NA's :187
## Count (and %) of NAs in Gluc column: 187 (67.75 %)
## iCal
## Min. :0.800
## 1st Qu.:1.150
## Median :1.230
## Mean :1.239
## 3rd Qu.:1.320
## Max. :2.420
## NA's :2
## Count (and %) of NAs in iCal column: 2 (0.72 %)
## Bicarb
## Min. : 9.80
## 1st Qu.:20.50
## Median :24.70
## Mean :25.23
## 3rd Qu.:29.20
## Max. :54.30
## NA's :1
## Count (and %) of NAs in Bicarb column: 1 (0.36 %)
## pH
## Min. :6.737
## 1st Qu.:7.240
## Median :7.330
## Mean :7.314
## 3rd Qu.:7.402
## Max. :7.577
## Count (and %) of NAs in pH column: 0 (0.00 %)
## Number (%) of `WELL MATCHED`: 225 (81.52 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4999 0.3571 0.5087 0.6614 1.2940
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.335e+01 1.033e+01 -2.260 0.02382 *
## NUM_VAL.y 8.391e-02 5.481e-02 1.531 0.12577
## pH 3.188e-01 1.407e-01 2.266 0.02348 *
## Gluc 2.786e-03 6.128e-03 0.455 0.64933
## Bicarb -6.305e-02 2.563e-02 -2.460 0.01388 *
## iCal 9.825e-01 1.009e+00 0.974 0.33024
## Lactate 3.792e-02 9.010e-02 0.421 0.67384
## AGE_PROC -1.120e-04 8.403e-05 -1.333 0.18256
## DEPTPICU 1.217e+00 3.842e-01 3.169 0.00153 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 264.17 on 275 degrees of freedom
## Residual deviance: 243.85 on 267 degrees of freedom
## AIC: 261.85
##
## Number of Fisher Scoring iterations: 5
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 7.248423e-11 1.087527e+00 1.375528e+00 1.002790e+00 9.388928e-01 2.671011e+00
## Lactate AGE_PROC DEPTPICU
## 1.038650e+00 9.998880e-01 3.378728e+00
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 7.297754e-20 0.03840205
## NUM_VAL.y 9.781522e-01 1.21391908
## pH 1.045631e+00 1.82310980
## Gluc 9.915373e-01 1.01568477
## Bicarb 8.923233e-01 0.98747316
## iCal 4.197485e-01 22.08604616
## Lactate 8.950642e-01 1.29999755
## AGE_PROC 9.997242e-01 1.00005595
## DEPTPICU 1.599281e+00 7.26578660
## Pre-Range Check Length: 276
## Cutoff value used: 7
## X :: Pos: 0.04 Neg: 0.96
## Y :: Pos: 0.05 Neg: 0.95
## P.pos: 0.0018
## P.neg: 0.9149
## P.obs: 0.97
## P.exp: 0.92
## Kappa: 0.65
## Pre-Range Check Length: 276
## Cutoff value used: 5
## X :: Pos: 0.01 Neg: 0.99
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0001
## P.neg: 0.9856
## P.obs: 0.99
## P.exp: 0.99
## Kappa: 0.50
## Pre-Range Check Length: 276
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.20 Neg: 0.80
## P.pos: 0.0412
## P.neg: 0.6354
## P.obs: 0.93
## P.exp: 0.68
## Kappa: 0.78
## Total number of input rows: 276
## Gold Standard:
## Positive: 10 (3.62 %)
## Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 8 (2.8986 %) FP: 6 (2.1739 %)
## FN: 2 (0.7246 %) TN: 260 (94.2029 %)
## Sens: 0.8000
## Spec: 0.9774
## PPV: 0.5714
## NPV: 0.9924
## FOR (1-NPV): 0.0076
## NNM (1/FOR): 131.0000
##
## Total number of input rows: 276
## Gold Standard:
## Positive: 10 (3.62 %)
## Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 8 (2.8986 %) FP: 11 (3.9855 %)
## FN: 2 (0.7246 %) TN: 255 (92.3913 %)
## Sens: 0.8000
## Spec: 0.9586
## PPV: 0.4211
## NPV: 0.9922
## FOR (1-NPV): 0.0078
## NNM (1/FOR): 128.5000
##
## Total number of input rows: 276
## Gold Standard:
## Positive: 10 (3.62 %)
## Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 9 (3.2609 %) FP: 25 (9.0580 %)
## FN: 1 (0.3623 %) TN: 241 (87.3188 %)
## Sens: 0.9000
## Spec: 0.9060
## PPV: 0.2647
## NPV: 0.9959
## FOR (1-NPV): 0.0041
## NNM (1/FOR): 242.0000
##
## Total number of input rows: 276
## Gold Standard:
## Positive: 10 (3.62 %)
## Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 9 (3.2609 %) FP: 34 (12.3188 %)
## FN: 1 (0.3623 %) TN: 232 (84.0580 %)
## Sens: 0.9000
## Spec: 0.8722
## PPV: 0.2093
## NPV: 0.9957
## FOR (1-NPV): 0.0043
## NNM (1/FOR): 233.0000
##
## Total number of input rows: 276
## Gold Standard:
## Positive: 10 (3.62 %)
## Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 10 (3.6232 %) FP: 46 (16.6667 %)
## FN: 0 (0.0000 %) TN: 220 (79.7101 %)
## Sens: 1.0000
## Spec: 0.8271
## PPV: 0.1786
## NPV: 1.0000
## FOR (1-NPV): 0.0000
## NNM (1/FOR): Inf
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.970 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.901 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.137 </td>
## <td style="text-align:right;"> 7.8 </td>
## <td style="text-align:right;"> 0.9 </td>
## <td style="text-align:right;"> 0.906 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.197 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 1.0 </td>
## <td style="text-align:right;"> 0.803 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 742 rows containing missing values (geom_point).
## Warning: Removed 742 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 76385
## Number of non-duplicated first PROC_NAME rows: 69438
## Number of non-duplicated second PROC_NAME rows: 69336
## Number of paired, simultaneous values: 69336
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 65674 (94.7 %)
## Box B: 69336 (100.0 %)
## Box C: 47 (0.1 %)
## Box D: 32 (0.0 %)
## Counts by Area:
## Green Area: 65674 (94.72 %)
## Yellow Area: 3583 (5.17 %)
## Red Area: 79 (0.11 %)
## NUmber of unique PN[1] order procedure keys: 69336
## Lactate
## Min. : 0.500
## 1st Qu.: 1.100
## Median : 1.500
## Mean : 2.086
## 3rd Qu.: 2.200
## Max. :27.200
## NA's :17862
## Count (and %) of NAs in Lactate column: 17862 (25.76 %)
## Gluc
## Min. : 13.0
## 1st Qu.: 97.0
## Median : 115.0
## Mean : 127.8
## 3rd Qu.: 141.0
## Max. :3008.0
## NA's :5831
## Count (and %) of NAs in Gluc column: 5831 (8.41 %)
## iCal
## Min. :0.270
## 1st Qu.:1.110
## Median :1.170
## Mean :1.177
## 3rd Qu.:1.230
## Max. :2.740
## NA's :5778
## Count (and %) of NAs in iCal column: 5778 (8.33 %)
## Bicarb
## Min. : 1.80
## 1st Qu.:23.90
## Median :27.30
## Mean :28.08
## 3rd Qu.:31.60
## Max. :82.60
## NA's :6
## Count (and %) of NAs in Bicarb column: 6 (0.01 %)
## pH
## Min. : 6.605
## 1st Qu.: 7.353
## Median : 7.398
## Mean : 7.393
## 3rd Qu.: 7.438
## Max. :37.600
## NA's :5
## Count (and %) of NAs in pH column: 5 (0.01 %)
## Number (%) of `WELL MATCHED`: 64163 (92.54 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1748 0.2595 0.4026 0.4454 0.9866
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.396e+00 5.298e-01 6.410 1.45e-10 ***
## NUM_VAL.y 3.877e-02 7.310e-03 5.304 1.14e-07 ***
## pH -6.129e-03 6.465e-03 -0.948 0.34315
## Gluc -6.284e-04 2.324e-04 -2.704 0.00686 **
## Bicarb 3.092e-03 2.287e-03 1.352 0.17640
## iCal -1.232e-01 1.450e-01 -0.849 0.39579
## Lactate 5.847e-02 9.802e-03 5.965 2.44e-09 ***
## AGE_PROC -9.488e-05 7.025e-06 -13.506 < 2e-16 ***
## DEPTPICU -8.611e-01 4.203e-02 -20.488 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 36803 on 69335 degrees of freedom
## Residual deviance: 35534 on 69327 degrees of freedom
## AIC: 35552
##
## Number of Fisher Scoring iterations: 6
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 29.8409996 1.0395324 0.9938900 0.9993718 1.0030969 0.8841174
## Lactate AGE_PROC DEPTPICU
## 1.0602167 0.9999051 0.4227003
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 7.5722661 91.9670401
## NUM_VAL.y 1.0247728 1.0545641
## pH 0.9799470 1.0115232
## Gluc 0.9989247 0.9998389
## Bicarb 0.9986293 1.0076231
## iCal 0.6662094 1.1762683
## Lactate 1.0405160 1.0812842
## AGE_PROC 0.9998914 0.9999189
## DEPTPICU 0.3890915 0.4587825
## Pre-Range Check Length: 69336
## Cutoff value used: 7
## X :: Pos: 0.02 Neg: 0.98
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0002
## P.neg: 0.9737
## P.obs: 0.99
## P.exp: 0.97
## Kappa: 0.51
## Pre-Range Check Length: 69336
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9981
## P.obs: 1.00
## P.exp: 1.00
## Kappa: 0.40
## Pre-Range Check Length: 69336
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.12 Neg: 0.88
## P.pos: 0.0250
## P.neg: 0.7001
## P.obs: 0.91
## P.exp: 0.73
## Kappa: 0.67
## Total number of input rows: 69336
## Gold Standard:
## Positive: 1239 (1.79 %)
## Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 476 (0.6865 %) FP: 122 (0.1760 %)
## FN: 763 (1.1004 %) TN: 67975 (98.0371 %)
## Sens: 0.3842
## Spec: 0.9982
## PPV: 0.7960
## NPV: 0.9889
## FOR (1-NPV): 0.0111
## NNM (1/FOR): 90.0891
##
## Total number of input rows: 69336
## Gold Standard:
## Positive: 1239 (1.79 %)
## Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 833 (1.2014 %) FP: 555 (0.8004 %)
## FN: 406 (0.5856 %) TN: 67542 (97.4126 %)
## Sens: 0.6723
## Spec: 0.9918
## PPV: 0.6001
## NPV: 0.9940
## FOR (1-NPV): 0.0060
## NNM (1/FOR): 167.3596
##
## Total number of input rows: 69336
## Gold Standard:
## Positive: 1239 (1.79 %)
## Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 986 (1.4221 %) FP: 1844 (2.6595 %)
## FN: 253 (0.3649 %) TN: 66253 (95.5535 %)
## Sens: 0.7958
## Spec: 0.9729
## PPV: 0.3484
## NPV: 0.9962
## FOR (1-NPV): 0.0038
## NNM (1/FOR): 262.8696
##
## Total number of input rows: 69336
## Gold Standard:
## Positive: 1239 (1.79 %)
## Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 1057 (1.5245 %) FP: 4173 (6.0185 %)
## FN: 182 (0.2625 %) TN: 63924 (92.1945 %)
## Sens: 0.8531
## Spec: 0.9387
## PPV: 0.2021
## NPV: 0.9972
## FOR (1-NPV): 0.0028
## NNM (1/FOR): 352.2308
##
## Total number of input rows: 69336
## Gold Standard:
## Positive: 1239 (1.79 %)
## Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 1099 (1.5850 %) FP: 7557 (10.8991 %)
## FN: 140 (0.2019 %) TN: 60540 (87.3139 %)
## Sens: 0.8870
## Spec: 0.8890
## PPV: 0.1270
## NPV: 0.9977
## FOR (1-NPV): 0.0023
## NNM (1/FOR): 433.4286
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.945 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.820 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.154 </td>
## <td style="text-align:right;"> 8.7 </td>
## <td style="text-align:right;"> 0.875 </td>
## <td style="text-align:right;"> 0.911 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.330 </td>
## <td style="text-align:right;"> 8.9 </td>
## <td style="text-align:right;"> 0.694 </td>
## <td style="text-align:right;"> 0.876 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 5185
## Number of non-duplicated first PROC_NAME rows: 4358
## Number of non-duplicated second PROC_NAME rows: 4332
## Number of paired, simultaneous values: 4332
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 3790 (87.5 %)
## Box B: 4332 (100.0 %)
## Box C: 6 (0.1 %)
## Box D: 11 (0.3 %)
## Counts by Area:
## Green Area: 3790 (87.49 %)
## Yellow Area: 525 (12.12 %)
## Red Area: 17 (0.39 %)
## NUmber of unique PN[1] order procedure keys: 4332
## Lactate
## Min. : 0.600
## 1st Qu.: 1.300
## Median : 2.200
## Mean : 3.855
## 3rd Qu.: 4.725
## Max. :19.100
## NA's :4208
## Count (and %) of NAs in Lactate column: 4208 (97.14 %)
## Gluc
## Min. : 31.00
## 1st Qu.: 97.75
## Median :124.00
## Mean :143.12
## 3rd Qu.:176.50
## Max. :440.00
## NA's :4204
## Count (and %) of NAs in Gluc column: 4204 (97.05 %)
## iCal
## Min. :0.300
## 1st Qu.:1.170
## Median :1.250
## Mean :1.263
## 3rd Qu.:1.340
## Max. :2.720
## NA's :8
## Count (and %) of NAs in iCal column: 8 (0.18 %)
## Bicarb
## Min. : 1.60
## 1st Qu.:21.10
## Median :24.70
## Mean :25.27
## 3rd Qu.:29.00
## Max. :58.00
## NA's :37
## Count (and %) of NAs in Bicarb column: 37 (0.85 %)
## pH
## Min. :6.661
## 1st Qu.:7.261
## Median :7.344
## Mean :7.329
## 3rd Qu.:7.409
## Max. :7.884
## NA's :15
## Count (and %) of NAs in pH column: 15 (0.35 %)
## Number (%) of `WELL MATCHED`: 3317 (76.57 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1564 0.6335 0.6999 0.7515 1.0173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.833e+00 2.344e+00 -2.915 0.003560 **
## NUM_VAL.y 2.123e-02 1.298e-02 1.636 0.101870
## pH 9.856e-02 3.123e-02 3.156 0.001601 **
## Gluc 1.405e-03 3.427e-03 0.410 0.681811
## Bicarb -2.900e-03 6.084e-03 -0.477 0.633572
## iCal 1.368e-01 2.012e-01 0.680 0.496576
## Lactate 3.236e-02 5.704e-02 0.567 0.570518
## AGE_PROC -3.447e-06 1.938e-05 -0.178 0.858818
## DEPTPICU 3.469e-01 9.129e-02 3.800 0.000145 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4716.9 on 4331 degrees of freedom
## Residual deviance: 4694.4 on 4323 degrees of freedom
## AIC: 4712.4
##
## Number of Fisher Scoring iterations: 4
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 0.001077271 1.021452401 1.103579470 1.001406181 0.997103843 1.146579129
## Lactate AGE_PROC DEPTPICU
## 1.032886742 0.999996553 1.414712977
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.093718e-05 0.1076267
## NUM_VAL.y 9.958650e-01 1.0478402
## pH 1.037872e+00 1.1731021
## Gluc 9.950547e-01 1.0086441
## Bicarb 9.853295e-01 1.0091188
## iCal 7.750386e-01 1.7060039
## Lactate 9.332117e-01 1.1754097
## AGE_PROC 9.999588e-01 1.0000348
## DEPTPICU 1.182780e+00 1.6918473
## Pre-Range Check Length: 4332
## Cutoff value used: 7
## X :: Pos: 0.04 Neg: 0.96
## Y :: Pos: 0.06 Neg: 0.94
## P.pos: 0.0022
## P.neg: 0.9068
## P.obs: 0.96
## P.exp: 0.91
## Kappa: 0.52
## Pre-Range Check Length: 4332
## Cutoff value used: 5
## X :: Pos: 0.01 Neg: 0.99
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0000
## P.neg: 0.9880
## P.obs: 0.99
## P.exp: 0.99
## Kappa: 0.23
## Pre-Range Check Length: 4332
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.22 Neg: 0.78
## P.pos: 0.0432
## P.neg: 0.6270
## P.obs: 0.89
## P.exp: 0.67
## Kappa: 0.67
## Total number of input rows: 4332
## Gold Standard:
## Positive: 165 (3.81 %)
## Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 112 (2.5854 %) FP: 136 (3.1394 %)
## FN: 53 (1.2235 %) TN: 4031 (93.0517 %)
## Sens: 0.6788
## Spec: 0.9674
## PPV: 0.4516
## NPV: 0.9870
## FOR (1-NPV): 0.0130
## NNM (1/FOR): 77.0566
##
## Total number of input rows: 4332
## Gold Standard:
## Positive: 165 (3.81 %)
## Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 118 (2.7239 %) FP: 197 (4.5476 %)
## FN: 47 (1.0849 %) TN: 3970 (91.6436 %)
## Sens: 0.7152
## Spec: 0.9527
## PPV: 0.3746
## NPV: 0.9883
## FOR (1-NPV): 0.0117
## NNM (1/FOR): 85.4681
##
## Total number of input rows: 4332
## Gold Standard:
## Positive: 165 (3.81 %)
## Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 134 (3.0933 %) FP: 387 (8.9335 %)
## FN: 31 (0.7156 %) TN: 3780 (87.2576 %)
## Sens: 0.8121
## Spec: 0.9071
## PPV: 0.2572
## NPV: 0.9919
## FOR (1-NPV): 0.0081
## NNM (1/FOR): 122.9355
##
## Total number of input rows: 4332
## Gold Standard:
## Positive: 165 (3.81 %)
## Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 137 (3.1625 %) FP: 503 (11.6113 %)
## FN: 28 (0.6464 %) TN: 3664 (84.5799 %)
## Sens: 0.8303
## Spec: 0.8793
## PPV: 0.2141
## NPV: 0.9924
## FOR (1-NPV): 0.0076
## NNM (1/FOR): 131.8571
##
## Total number of input rows: 4332
## Gold Standard:
## Positive: 165 (3.81 %)
## Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 147 (3.3934 %) FP: 804 (18.5596 %)
## FN: 18 (0.4155 %) TN: 3363 (77.6316 %)
## Sens: 0.8909
## Spec: 0.8071
## PPV: 0.1546
## NPV: 0.9947
## FOR (1-NPV): 0.0053
## NNM (1/FOR): 187.8333
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.927 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.829 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.206 </td>
## <td style="text-align:right;"> 8.5 </td>
## <td style="text-align:right;"> 0.867 </td>
## <td style="text-align:right;"> 0.842 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.348 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 0.727 </td>
## <td style="text-align:right;"> 0.783 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 90493
## Number of non-duplicated first PROC_NAME rows: 74478
## Number of non-duplicated second PROC_NAME rows: 73790
## Number of paired, simultaneous values: 73790
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 69643 (94.4 %)
## Box B: 73790 (100.0 %)
## Box C: 59 (0.1 %)
## Box D: 51 (0.1 %)
## Counts by Area:
## Green Area: 69643 (94.38 %)
## Yellow Area: 4037 (5.47 %)
## Red Area: 110 (0.15 %)
## NUmber of unique PN[1] order procedure keys: 73790
## Lactate
## Min. : 0.500
## 1st Qu.: 1.100
## Median : 1.500
## Mean : 2.101
## 3rd Qu.: 2.200
## Max. :27.200
## NA's :19066
## Count (and %) of NAs in Lactate column: 19066 (25.84 %)
## Gluc
## Min. : 10.0
## 1st Qu.: 97.0
## Median : 115.0
## Mean : 127.9
## 3rd Qu.: 141.0
## Max. :1849.0
## NA's :5686
## Count (and %) of NAs in Gluc column: 5686 (7.71 %)
## iCal
## Min. :0.270
## 1st Qu.:1.110
## Median :1.170
## Mean :1.177
## 3rd Qu.:1.230
## Max. :2.740
## NA's :5667
## Count (and %) of NAs in iCal column: 5667 (7.68 %)
## Bicarb
## Min. : 1.70
## 1st Qu.:23.80
## Median :27.30
## Mean :28.05
## 3rd Qu.:31.60
## Max. :82.60
## NA's :3
## Count (and %) of NAs in Bicarb column: 3 (0.00 %)
## pH
## Min. :6.621
## 1st Qu.:7.354
## Median :7.398
## Mean :7.393
## 3rd Qu.:7.439
## Max. :7.899
## NA's :4
## Count (and %) of NAs in pH column: 4 (0.01 %)
## Number (%) of `WELL MATCHED`: 67598 (91.61 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5980 0.3082 0.4190 0.4644 1.0330
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.196e+00 1.461e+00 3.557 0.000376 ***
## NUM_VAL.y 2.850e-02 6.490e-03 4.391 1.13e-05 ***
## pH -2.639e-02 1.926e-02 -1.370 0.170597
## Gluc -1.053e-03 2.166e-04 -4.862 1.16e-06 ***
## Bicarb 7.659e-03 2.200e-03 3.482 0.000498 ***
## iCal -5.216e-01 1.301e-01 -4.010 6.07e-05 ***
## Lactate -2.545e-02 6.378e-03 -3.990 6.60e-05 ***
## AGE_PROC -7.473e-05 6.574e-06 -11.367 < 2e-16 ***
## DEPTPICU -6.281e-01 3.632e-02 -17.293 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 42536 on 73789 degrees of freedom
## Residual deviance: 41558 on 73781 degrees of freedom
## AIC: 41576
##
## Number of Fisher Scoring iterations: 5
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 180.6280846 1.0289083 0.9739565 0.9989474 1.0076882 0.5935526
## Lactate AGE_PROC DEPTPICU
## 0.9748693 0.9999253 0.5336092
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 10.3802637 3187.7327061
## NUM_VAL.y 1.0159252 1.0421022
## pH 0.9377869 1.0113209
## Gluc 0.9985288 0.9993779
## Bicarb 1.0033720 1.0120617
## iCal 0.4604422 0.7667025
## Lactate 0.9629330 0.9873234
## AGE_PROC 0.9999124 0.9999382
## DEPTPICU 0.4968084 0.5728326
## Pre-Range Check Length: 73790
## Cutoff value used: 7
## X :: Pos: 0.02 Neg: 0.98
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0002
## P.neg: 0.9725
## P.obs: 0.99
## P.exp: 0.97
## Kappa: 0.48
## Pre-Range Check Length: 73790
## Cutoff value used: 5
## X :: Pos: 0.00 Neg: 1.00
## Y :: Pos: 0.00 Neg: 1.00
## P.pos: 0.0000
## P.neg: 0.9980
## P.obs: 1.00
## P.exp: 1.00
## Kappa: 0.30
## Pre-Range Check Length: 73790
## Cutoff value used: 9
## X :: Pos: 0.20 Neg: 0.80
## Y :: Pos: 0.13 Neg: 0.87
## P.pos: 0.0249
## P.neg: 0.7006
## P.obs: 0.91
## P.exp: 0.73
## Kappa: 0.66
## Total number of input rows: 73790
## Gold Standard:
## Positive: 1357 (1.84 %)
## Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 501 (0.6790 %) FP: 185 (0.2507 %)
## FN: 856 (1.1600 %) TN: 72248 (97.9103 %)
## Sens: 0.3692
## Spec: 0.9974
## PPV: 0.7303
## NPV: 0.9883
## FOR (1-NPV): 0.0117
## NNM (1/FOR): 85.4019
##
## Total number of input rows: 73790
## Gold Standard:
## Positive: 1357 (1.84 %)
## Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 873 (1.1831 %) FP: 684 (0.9270 %)
## FN: 484 (0.6559 %) TN: 71749 (97.2340 %)
## Sens: 0.6433
## Spec: 0.9906
## PPV: 0.5607
## NPV: 0.9933
## FOR (1-NPV): 0.0067
## NNM (1/FOR): 149.2417
##
## Total number of input rows: 73790
## Gold Standard:
## Positive: 1357 (1.84 %)
## Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 1045 (1.4162 %) FP: 2073 (2.8093 %)
## FN: 312 (0.4228 %) TN: 70360 (95.3517 %)
## Sens: 0.7701
## Spec: 0.9714
## PPV: 0.3352
## NPV: 0.9956
## FOR (1-NPV): 0.0044
## NNM (1/FOR): 226.5128
##
## Total number of input rows: 73790
## Gold Standard:
## Positive: 1357 (1.84 %)
## Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 1130 (1.5314 %) FP: 4507 (6.1079 %)
## FN: 227 (0.3076 %) TN: 67926 (92.0531 %)
## Sens: 0.8327
## Spec: 0.9378
## PPV: 0.2005
## NPV: 0.9967
## FOR (1-NPV): 0.0033
## NNM (1/FOR): 300.2335
##
## Total number of input rows: 73790
## Gold Standard:
## Positive: 1357 (1.84 %)
## Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 1181 (1.6005 %) FP: 8057 (10.9188 %)
## FN: 176 (0.2385 %) TN: 64376 (87.2422 %)
## Sens: 0.8703
## Spec: 0.8888
## PPV: 0.1278
## NPV: 0.9973
## FOR (1-NPV): 0.0027
## NNM (1/FOR): 366.7727
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.937 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.816 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.168 </td>
## <td style="text-align:right;"> 8.7 </td>
## <td style="text-align:right;"> 0.858 </td>
## <td style="text-align:right;"> 0.910 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.346 </td>
## <td style="text-align:right;"> 9.4 </td>
## <td style="text-align:right;"> 0.710 </td>
## <td style="text-align:right;"> 0.812 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 11795
## Number of non-duplicated first PROC_NAME rows: 7590
## Number of non-duplicated second PROC_NAME rows: 7017
## Number of paired, simultaneous values: 7017
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
## Box A: 6080 (86.6 %)
## Box B: 7016 (100.0 %)
## Box C: 16 (0.2 %)
## Box D: 32 (0.5 %)
## Counts by Area:
## Green Area: 6080 (86.65 %)
## Yellow Area: 888 (12.65 %)
## Red Area: 48 (0.68 %)
## NUmber of unique PN[1] order procedure keys: 7017
## Lactate
## Min. : 0.600
## 1st Qu.: 1.300
## Median : 2.100
## Mean : 3.832
## 3rd Qu.: 3.975
## Max. :19.100
## NA's :6891
## Count (and %) of NAs in Lactate column: 6891 (98.20 %)
## Gluc
## Min. : 31.0
## 1st Qu.:105.5
## Median :136.0
## Mean :151.3
## 3rd Qu.:183.0
## Max. :501.0
## NA's :6886
## Count (and %) of NAs in Gluc column: 6886 (98.13 %)
## iCal
## Min. :0.300
## 1st Qu.:1.180
## Median :1.260
## Mean :1.272
## 3rd Qu.:1.350
## Max. :2.500
## NA's :19
## Count (and %) of NAs in iCal column: 19 (0.27 %)
## Bicarb
## Min. : 1.60
## 1st Qu.:21.90
## Median :25.70
## Mean :26.01
## 3rd Qu.:29.80
## Max. :63.40
## NA's :42
## Count (and %) of NAs in Bicarb column: 42 (0.60 %)
## pH
## Min. :6.739
## 1st Qu.:7.285
## Median :7.358
## Mean :7.349
## 3rd Qu.:7.422
## Max. :7.734
## NA's :16
## Count (and %) of NAs in pH column: 16 (0.23 %)
## Number (%) of `WELL MATCHED`: 4825 (68.76 %)
##
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb +
## iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>%
## dplyr::mutate(pH = pH * 10))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8197 -1.4375 0.8212 0.8978 1.0222
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.759e-01 1.865e+00 0.255 0.798585
## NUM_VAL.y -1.125e-02 9.609e-03 -1.171 0.241533
## pH 8.349e-04 2.457e-02 0.034 0.972890
## Gluc -1.881e-03 3.007e-03 -0.626 0.531633
## Bicarb 3.330e-03 4.513e-03 0.738 0.460599
## iCal 1.925e-01 1.543e-01 1.248 0.212158
## Lactate 5.967e-02 5.688e-02 1.049 0.294176
## AGE_PROC 3.983e-05 1.453e-05 2.741 0.006128 **
## DEPTPICU 2.250e-01 6.707e-02 3.355 0.000793 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8715.0 on 7016 degrees of freedom
## Residual deviance: 8670.4 on 7008 degrees of freedom
## AIC: 8688.4
##
## Number of Fisher Scoring iterations: 4
##
## (Intercept) NUM_VAL.y pH Gluc Bicarb iCal
## 1.6095155 0.9888090 1.0008353 0.9981211 1.0033357 1.2122396
## Lactate AGE_PROC DEPTPICU
## 1.0614825 1.0000398 1.2523838
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.04180621 62.676110
## NUM_VAL.y 0.97036823 1.007623
## pH 0.95368861 1.050121
## Gluc 0.99224111 1.004139
## Bicarb 0.99452564 1.012281
## iCal 0.89698985 1.642481
## Lactate 0.95883488 1.203738
## AGE_PROC 1.00001147 1.000068
## DEPTPICU 1.09811576 1.428402
## Pre-Range Check Length: 7017
## Cutoff value used: 7
## X :: Pos: 0.03 Neg: 0.97
## Y :: Pos: 0.04 Neg: 0.96
## P.pos: 0.0014
## P.neg: 0.9246
## P.obs: 0.96
## P.exp: 0.93
## Kappa: 0.45
## Pre-Range Check Length: 7017
## Cutoff value used: 5
## X :: Pos: 0.01 Neg: 0.99
## Y :: Pos: 0.01 Neg: 0.99
## P.pos: 0.0000
## P.neg: 0.9892
## P.obs: 0.99
## P.exp: 0.99
## Kappa: 0.21
## Pre-Range Check Length: 7017
## Cutoff value used: 9
## X :: Pos: 0.17 Neg: 0.83
## Y :: Pos: 0.19 Neg: 0.81
## P.pos: 0.0338
## P.neg: 0.6655
## P.obs: 0.88
## P.exp: 0.70
## Kappa: 0.61
## Total number of input rows: 7017
## Gold Standard:
## Positive: 228 (3.25 %)
## Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.0
## TP: 127 (1.8099 %) FP: 184 (2.6222 %)
## FN: 101 (1.4394 %) TN: 6605 (94.1285 %)
## Sens: 0.5570
## Spec: 0.9729
## PPV: 0.4084
## NPV: 0.9849
## FOR (1-NPV): 0.0151
## NNM (1/FOR): 66.3960
##
## Total number of input rows: 7017
## Gold Standard:
## Positive: 228 (3.25 %)
## Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 7.5
## TP: 134 (1.9096 %) FP: 278 (3.9618 %)
## FN: 94 (1.3396 %) TN: 6511 (92.7889 %)
## Sens: 0.5877
## Spec: 0.9591
## PPV: 0.3252
## NPV: 0.9858
## FOR (1-NPV): 0.0142
## NNM (1/FOR): 70.2660
##
## Total number of input rows: 7017
## Gold Standard:
## Positive: 228 (3.25 %)
## Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.0
## TP: 158 (2.2517 %) FP: 561 (7.9949 %)
## FN: 70 (0.9976 %) TN: 6228 (88.7559 %)
## Sens: 0.6930
## Spec: 0.9174
## PPV: 0.2197
## NPV: 0.9889
## FOR (1-NPV): 0.0111
## NNM (1/FOR): 89.9714
##
## Total number of input rows: 7017
## Gold Standard:
## Positive: 228 (3.25 %)
## Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 8.5
## TP: 172 (2.4512 %) FP: 742 (10.5743 %)
## FN: 56 (0.7981 %) TN: 6047 (86.1764 %)
## Sens: 0.7544
## Spec: 0.8907
## PPV: 0.1882
## NPV: 0.9908
## FOR (1-NPV): 0.0092
## NNM (1/FOR): 108.9821
##
## Total number of input rows: 7017
## Gold Standard:
## Positive: 228 (3.25 %)
## Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0 PN[2]: 9.0
## TP: 184 (2.6222 %) FP: 1183 (16.8591 %)
## FN: 44 (0.6270 %) TN: 5606 (79.8917 %)
## Sens: 0.8070
## Spec: 0.8257
## PPV: 0.1346
## NPV: 0.9922
## FOR (1-NPV): 0.0078
## NNM (1/FOR): 128.4091
##
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> AUROC </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.886 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.816 </td>
## </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
## <thead>
## <tr>
## <th style="text-align:left;"> CBC Cutoff </th>
## <th style="text-align:right;"> Distance </th>
## <th style="text-align:right;"> Cutoff </th>
## <th style="text-align:right;"> Sens </th>
## <th style="text-align:right;"> Spec </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;"> High (7.0 g/dL) </td>
## <td style="text-align:right;"> 0.260 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 0.807 </td>
## <td style="text-align:right;"> 0.826 </td>
## </tr>
## <tr>
## <td style="text-align:left;"> Low (5.0 g/dL) </td>
## <td style="text-align:right;"> 0.347 </td>
## <td style="text-align:right;"> 8.8 </td>
## <td style="text-align:right;"> 0.711 </td>
## <td style="text-align:right;"> 0.808 </td>
## </tr>
## </tbody>
## </table>
## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).